netfs_read_folio() needs to handle partially-valid pages that are marked
dirty, but not uptodate in the event that someone tries to read a page was
used to cache data by a streaming write.

In such a case, make netfs_read_folio() set up a bvec iterator that points
to the parts of the folio that need filling and to a sink page for the data
that should be discarded and use that instead of i_pages as the iterator to
be written to.

This requires netfs_rreq_unlock_folios() to convert the page into a normal
dirty uptodate page, getting rid of the partial write record and bumping
the group pointer over to folio->private.

Signed-off-by: David Howells <dhowe...@redhat.com>
cc: Jeff Layton <jlay...@kernel.org>
cc: linux-cachefs@redhat.com
cc: linux-fsde...@vger.kernel.org
cc: linux...@kvack.org
---
 fs/netfs/buffered_read.c     | 61 ++++++++++++++++++++++++++++++++++--
 include/trace/events/netfs.h |  2 ++
 2 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 2f06344bba21..374707df6575 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -16,6 +16,7 @@
 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 {
        struct netfs_io_subrequest *subreq;
+       struct netfs_folio *finfo;
        struct folio *folio;
        pgoff_t start_page = rreq->start / PAGE_SIZE;
        pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
@@ -86,6 +87,15 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
 
                if (!pg_failed) {
                        flush_dcache_folio(folio);
+                       finfo = netfs_folio_info(folio);
+                       if (finfo) {
+                               trace_netfs_folio(folio, 
netfs_folio_trace_filled_gaps);
+                               if (finfo->netfs_group)
+                                       folio_change_private(folio, 
finfo->netfs_group);
+                               else
+                                       folio_detach_private(folio);
+                               kfree(finfo);
+                       }
                        folio_mark_uptodate(folio);
                }
 
@@ -245,6 +255,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
        struct address_space *mapping = folio_file_mapping(folio);
        struct netfs_io_request *rreq;
        struct netfs_inode *ctx = netfs_inode(mapping->host);
+       struct folio *sink = NULL;
        int ret;
 
        _enter("%lx", folio_index(folio));
@@ -265,12 +276,56 @@ int netfs_read_folio(struct file *file, struct folio 
*folio)
        trace_netfs_read(rreq, rreq->start, rreq->len, 
netfs_read_trace_readpage);
 
        /* Set up the output buffer */
-       iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
-                       rreq->start, rreq->len);
+       if (folio_test_dirty(folio)) {
+               /* Handle someone trying to read from an unflushed streaming
+                * write.  We fiddle the buffer so that a gap at the beginning
+                * and/or a gap at the end get copied to, but the middle is
+                * discarded.
+                */
+               struct netfs_folio *finfo = netfs_folio_info(folio);
+               struct bio_vec *bvec;
+               unsigned int from = finfo->dirty_offset;
+               unsigned int to = from + finfo->dirty_len;
+               unsigned int off = 0, i = 0;
+               size_t flen = folio_size(folio);
+               size_t nr_bvec = flen / PAGE_SIZE + 2;
+               size_t part;
+
+               ret = -ENOMEM;
+               bvec = kmalloc_array(nr_bvec, sizeof(*bvec), GFP_KERNEL);
+               if (!bvec)
+                       goto discard;
+
+               sink = folio_alloc(GFP_KERNEL, 0);
+               if (!sink)
+                       goto discard;
+
+               trace_netfs_folio(folio, netfs_folio_trace_read_gaps);
+
+               rreq->direct_bv = bvec;
+               rreq->direct_bv_count = nr_bvec;
+               if (from > 0) {
+                       bvec_set_folio(&bvec[i++], folio, from, 0);
+                       off = from;
+               }
+               while (off < to) {
+                       part = min_t(size_t, to - off, PAGE_SIZE);
+                       bvec_set_folio(&bvec[i++], sink, part, 0);
+                       off += part;
+               }
+               if (to < flen)
+                       bvec_set_folio(&bvec[i++], folio, flen - to, to);
+               iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len);
+       } else {
+               iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
+                               rreq->start, rreq->len);
+       }
 
        ret = netfs_begin_read(rreq, true);
+       if (sink)
+               folio_put(sink);
        netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
-       return ret;
+       return ret < 0 ? ret : 0;
 
 discard:
        netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 94793f842000..b7426f455086 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -115,9 +115,11 @@
        EM(netfs_folio_trace_clear,             "clear")        \
        EM(netfs_folio_trace_clear_s,           "clear-s")      \
        EM(netfs_folio_trace_clear_g,           "clear-g")      \
+       EM(netfs_folio_trace_filled_gaps,       "filled-gaps")  \
        EM(netfs_folio_trace_kill,              "kill")         \
        EM(netfs_folio_trace_mkwrite,           "mkwrite")      \
        EM(netfs_folio_trace_mkwrite_plus,      "mkwrite+")     \
+       EM(netfs_folio_trace_read_gaps,         "read-gaps")    \
        EM(netfs_folio_trace_redirty,           "redirty")      \
        EM(netfs_folio_trace_redirtied,         "redirtied")    \
        EM(netfs_folio_trace_store,             "store")        \
--
Linux-cachefs mailing list
Linux-cachefs@redhat.com
https://listman.redhat.com/mailman/listinfo/linux-cachefs

Reply via email to