On Tue, 2022-03-08 at 23:29 +0000, David Howells wrote:
> Split fs/netfs/read_helper.c into two pieces, one to deal with buffered
> writes and one to deal with the I/O mechanism.
> 

I think you mean buffered reads here?

> Changes
> =======
> ver #2)
>  - Add kdoc reference to new file.
> 
> Signed-off-by: David Howells <dhowe...@redhat.com>
> cc: linux-cachefs@redhat.com
> 
> Link: 
> https://lore.kernel.org/r/164623005586.3564931.6149556072728481767.st...@warthog.procyon.org.uk/
>  # v1
> ---
> 
>  fs/netfs/Makefile        |    1 
>  fs/netfs/buffered_read.c |  428 
> ++++++++++++++++++++++++++++++++++++++++++++++
>  fs/netfs/io.c            |  418 ---------------------------------------------
>  3 files changed, 429 insertions(+), 418 deletions(-)
>  create mode 100644 fs/netfs/buffered_read.c
> 
> diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
> index 51ece4f7bc77..88b904532bc7 100644
> --- a/fs/netfs/Makefile
> +++ b/fs/netfs/Makefile
> @@ -1,6 +1,7 @@
>  # SPDX-License-Identifier: GPL-2.0
>  
>  netfs-y := \
> +     buffered_read.o \
>       io.o \
>       objects.o
>  
> diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
> new file mode 100644
> index 000000000000..09ba7097a970
> --- /dev/null
> +++ b/fs/netfs/buffered_read.c
> @@ -0,0 +1,428 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/* Network filesystem high-level buffered read support.
> + *
> + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
> + * Written by David Howells (dhowe...@redhat.com)
> + */
> +
> +#include <linux/export.h>
> +#include <linux/task_io_accounting_ops.h>
> +#include "internal.h"
> +
> +/*
> + * Unlock the folios in a read operation.  We need to set PG_fscache on any
> + * folios we're going to write back before we unlock them.
> + */
> +void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
> +{
> +     struct netfs_io_subrequest *subreq;
> +     struct folio *folio;
> +     unsigned int iopos, account = 0;
> +     pgoff_t start_page = rreq->start / PAGE_SIZE;
> +     pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
> +     bool subreq_failed = false;
> +
> +     XA_STATE(xas, &rreq->mapping->i_pages, start_page);
> +
> +     if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
> +             __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
> +             list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
> +                     __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
> +             }
> +     }
> +
> +     /* Walk through the pagecache and the I/O request lists simultaneously.
> +      * We may have a mixture of cached and uncached sections and we only
> +      * really want to write out the uncached sections.  This is slightly
> +      * complicated by the possibility that we might have huge pages with a
> +      * mixture inside.
> +      */
> +     subreq = list_first_entry(&rreq->subrequests,
> +                               struct netfs_io_subrequest, rreq_link);
> +     iopos = 0;
> +     subreq_failed = (subreq->error < 0);
> +
> +     trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
> +
> +     rcu_read_lock();
> +     xas_for_each(&xas, folio, last_page) {
> +             unsigned int pgpos = (folio_index(folio) - start_page) * 
> PAGE_SIZE;
> +             unsigned int pgend = pgpos + folio_size(folio);
> +             bool pg_failed = false;
> +
> +             for (;;) {
> +                     if (!subreq) {
> +                             pg_failed = true;
> +                             break;
> +                     }
> +                     if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
> +                             folio_start_fscache(folio);
> +                     pg_failed |= subreq_failed;
> +                     if (pgend < iopos + subreq->len)
> +                             break;
> +
> +                     account += subreq->transferred;
> +                     iopos += subreq->len;
> +                     if (!list_is_last(&subreq->rreq_link, 
> &rreq->subrequests)) {
> +                             subreq = list_next_entry(subreq, rreq_link);
> +                             subreq_failed = (subreq->error < 0);
> +                     } else {
> +                             subreq = NULL;
> +                             subreq_failed = false;
> +                     }
> +                     if (pgend == iopos)
> +                             break;
> +             }
> +
> +             if (!pg_failed) {
> +                     flush_dcache_folio(folio);
> +                     folio_mark_uptodate(folio);
> +             }
> +
> +             if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
> +                     if (folio_index(folio) == rreq->no_unlock_folio &&
> +                         test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
> +                             _debug("no unlock");
> +                     else
> +                             folio_unlock(folio);
> +             }
> +     }
> +     rcu_read_unlock();
> +
> +     task_io_account_read(account);
> +     if (rreq->netfs_ops->done)
> +             rreq->netfs_ops->done(rreq);
> +}
> +
> +static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
> +                                      loff_t *_start, size_t *_len, loff_t 
> i_size)
> +{
> +     struct netfs_cache_resources *cres = &rreq->cache_resources;
> +
> +     if (cres->ops && cres->ops->expand_readahead)
> +             cres->ops->expand_readahead(cres, _start, _len, i_size);
> +}
> +
> +static void netfs_rreq_expand(struct netfs_io_request *rreq,
> +                           struct readahead_control *ractl)
> +{
> +     /* Give the cache a chance to change the request parameters.  The
> +      * resultant request must contain the original region.
> +      */
> +     netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, 
> rreq->i_size);
> +
> +     /* Give the netfs a chance to change the request parameters.  The
> +      * resultant request must contain the original region.
> +      */
> +     if (rreq->netfs_ops->expand_readahead)
> +             rreq->netfs_ops->expand_readahead(rreq);
> +
> +     /* Expand the request if the cache wants it to start earlier.  Note
> +      * that the expansion may get further extended if the VM wishes to
> +      * insert THPs and the preferred start and/or end wind up in the middle
> +      * of THPs.
> +      *
> +      * If this is the case, however, the THP size should be an integer
> +      * multiple of the cache granule size, so we get a whole number of
> +      * granules to deal with.
> +      */
> +     if (rreq->start  != readahead_pos(ractl) ||
> +         rreq->len != readahead_length(ractl)) {
> +             readahead_expand(ractl, rreq->start, rreq->len);
> +             rreq->start  = readahead_pos(ractl);
> +             rreq->len = readahead_length(ractl);
> +
> +             trace_netfs_read(rreq, readahead_pos(ractl), 
> readahead_length(ractl),
> +                              netfs_read_trace_expanded);
> +     }
> +}
> +
> +/**
> + * netfs_readahead - Helper to manage a read request
> + * @ractl: The description of the readahead request
> + *
> + * Fulfil a readahead request by drawing data from the cache if possible, or
> + * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
> + * requests from different sources will get munged together.  If necessary, 
> the
> + * readahead window can be expanded in either direction to a more convenient
> + * alighment for RPC efficiency or to make storage in the cache feasible.
> + *
> + * The calling netfs must initialise a netfs context contiguous to the vfs
> + * inode before calling this.
> + *
> + * This is usable whether or not caching is enabled.
> + */
> +void netfs_readahead(struct readahead_control *ractl)
> +{
> +     struct netfs_io_request *rreq;
> +     struct netfs_i_context *ctx = netfs_i_context(ractl->mapping->host);
> +     int ret;
> +
> +     _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
> +
> +     if (readahead_count(ractl) == 0)
> +             return;
> +
> +     rreq = netfs_alloc_request(ractl->mapping, ractl->file,
> +                                readahead_pos(ractl),
> +                                readahead_length(ractl),
> +                                NETFS_READAHEAD);
> +     if (IS_ERR(rreq))
> +             return;
> +
> +     if (ctx->ops->begin_cache_operation) {
> +             ret = ctx->ops->begin_cache_operation(rreq);
> +             if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> +                     goto cleanup_free;
> +     }
> +
> +     netfs_stat(&netfs_n_rh_readahead);
> +     trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
> +                      netfs_read_trace_readahead);
> +
> +     netfs_rreq_expand(rreq, ractl);
> +
> +     /* Drop the refs on the folios here rather than in the cache or
> +      * filesystem.  The locks will be dropped in netfs_rreq_unlock().
> +      */
> +     while (readahead_folio(ractl))
> +             ;
> +
> +     netfs_begin_read(rreq, false);
> +     return;
> +
> +cleanup_free:
> +     netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
> +     return;
> +}
> +EXPORT_SYMBOL(netfs_readahead);
> +
> +/**
> + * netfs_readpage - Helper to manage a readpage request
> + * @file: The file to read from
> + * @subpage: A subpage of the folio to read
> + *
> + * Fulfil a readpage request by drawing data from the cache if possible, or 
> the
> + * netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O requests
> + * from different sources will get munged together.
> + *
> + * The calling netfs must initialise a netfs context contiguous to the vfs
> + * inode before calling this.
> + *
> + * This is usable whether or not caching is enabled.
> + */
> +int netfs_readpage(struct file *file, struct page *subpage)
> +{
> +     struct folio *folio = page_folio(subpage);
> +     struct address_space *mapping = folio_file_mapping(folio);
> +     struct netfs_io_request *rreq;
> +     struct netfs_i_context *ctx = netfs_i_context(mapping->host);
> +     int ret;
> +
> +     _enter("%lx", folio_index(folio));
> +
> +     rreq = netfs_alloc_request(mapping, file,
> +                                folio_file_pos(folio), folio_size(folio),
> +                                NETFS_READPAGE);
> +     if (IS_ERR(rreq)) {
> +             ret = PTR_ERR(rreq);
> +             goto alloc_error;
> +     }
> +
> +     if (ctx->ops->begin_cache_operation) {
> +             ret = ctx->ops->begin_cache_operation(rreq);
> +             if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> +                     goto discard;
> +     }
> +
> +     netfs_stat(&netfs_n_rh_readpage);
> +     trace_netfs_read(rreq, rreq->start, rreq->len, 
> netfs_read_trace_readpage);
> +     return netfs_begin_read(rreq, true);
> +
> +discard:
> +     netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
> +alloc_error:
> +     folio_unlock(folio);
> +     return ret;
> +}
> +EXPORT_SYMBOL(netfs_readpage);
> +
> +/*
> + * Prepare a folio for writing without reading first
> + * @folio: The folio being prepared
> + * @pos: starting position for the write
> + * @len: length of write
> + * @always_fill: T if the folio should always be completely filled/cleared
> + *
> + * In some cases, write_begin doesn't need to read at all:
> + * - full folio write
> + * - write that lies in a folio that is completely beyond EOF
> + * - write that covers the folio from start to EOF or beyond it
> + *
> + * If any of these criteria are met, then zero out the unwritten parts
> + * of the folio and return true. Otherwise, return false.
> + */
> +static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t 
> len,
> +                              bool always_fill)
> +{
> +     struct inode *inode = folio_inode(folio);
> +     loff_t i_size = i_size_read(inode);
> +     size_t offset = offset_in_folio(folio, pos);
> +     size_t plen = folio_size(folio);
> +
> +     if (unlikely(always_fill)) {
> +             if (pos - offset + len <= i_size)
> +                     return false; /* Page entirely before EOF */
> +             zero_user_segment(&folio->page, 0, plen);
> +             folio_mark_uptodate(folio);
> +             return true;
> +     }
> +
> +     /* Full folio write */
> +     if (offset == 0 && len >= plen)
> +             return true;
> +
> +     /* Page entirely beyond the end of the file */
> +     if (pos - offset >= i_size)
> +             goto zero_out;
> +
> +     /* Write that covers from the start of the folio to EOF or beyond */
> +     if (offset == 0 && (pos + len) >= i_size)
> +             goto zero_out;
> +
> +     return false;
> +zero_out:
> +     zero_user_segments(&folio->page, 0, offset, offset + len, len);
> +     return true;
> +}
> +
> +/**
> + * netfs_write_begin - Helper to prepare for writing
> + * @file: The file to read from
> + * @mapping: The mapping to read from
> + * @pos: File position at which the write will begin
> + * @len: The length of the write (may extend beyond the end of the folio 
> chosen)
> + * @aop_flags: AOP_* flags
> + * @_folio: Where to put the resultant folio
> + * @_fsdata: Place for the netfs to store a cookie
> + *
> + * Pre-read data for a write-begin request by drawing data from the cache if
> + * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
> + * Multiple I/O requests from different sources will get munged together.  If
> + * necessary, the readahead window can be expanded in either direction to a
> + * more convenient alighment for RPC efficiency or to make storage in the 
> cache
> + * feasible.
> + *
> + * The calling netfs must provide a table of operations, only one of which,
> + * issue_op, is mandatory.
> + *
> + * The check_write_begin() operation can be provided to check for and flush
> + * conflicting writes once the folio is grabbed and locked.  It is passed a
> + * pointer to the fsdata cookie that gets returned to the VM to be passed to
> + * write_end.  It is permitted to sleep.  It should return 0 if the request
> + * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
> + * be regot; or return an error.
> + *
> + * The calling netfs must initialise a netfs context contiguous to the vfs
> + * inode before calling this.
> + *
> + * This is usable whether or not caching is enabled.
> + */
> +int netfs_write_begin(struct file *file, struct address_space *mapping,
> +                   loff_t pos, unsigned int len, unsigned int aop_flags,
> +                   struct folio **_folio, void **_fsdata)
> +{
> +     struct netfs_io_request *rreq;
> +     struct netfs_i_context *ctx = netfs_i_context(file_inode(file ));
> +     struct folio *folio;
> +     unsigned int fgp_flags;
> +     pgoff_t index = pos >> PAGE_SHIFT;
> +     int ret;
> +
> +     DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
> +
> +retry:
> +     fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
> +     if (aop_flags & AOP_FLAG_NOFS)
> +             fgp_flags |= FGP_NOFS;
> +     folio = __filemap_get_folio(mapping, index, fgp_flags,
> +                                 mapping_gfp_mask(mapping));
> +     if (!folio)
> +             return -ENOMEM;
> +
> +     if (ctx->ops->check_write_begin) {
> +             /* Allow the netfs (eg. ceph) to flush conflicts. */
> +             ret = ctx->ops->check_write_begin(file, pos, len, folio, 
> _fsdata);
> +             if (ret < 0) {
> +                     trace_netfs_failure(NULL, NULL, ret, 
> netfs_fail_check_write_begin);
> +                     if (ret == -EAGAIN)
> +                             goto retry;
> +                     goto error;
> +             }
> +     }
> +
> +     if (folio_test_uptodate(folio))
> +             goto have_folio;
> +
> +     /* If the page is beyond the EOF, we want to clear it - unless it's
> +      * within the cache granule containing the EOF, in which case we need
> +      * to preload the granule.
> +      */
> +     if (!netfs_is_cache_enabled(ctx) &&
> +         netfs_skip_folio_read(folio, pos, len, false)) {
> +             netfs_stat(&netfs_n_rh_write_zskip);
> +             goto have_folio_no_wait;
> +     }
> +
> +     rreq = netfs_alloc_request(mapping, file,
> +                                folio_file_pos(folio), folio_size(folio),
> +                                NETFS_READ_FOR_WRITE);
> +     if (IS_ERR(rreq)) {
> +             ret = PTR_ERR(rreq);
> +             goto error;
> +     }
> +     rreq->no_unlock_folio   = folio_index(folio);
> +     __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
> +
> +     if (ctx->ops->begin_cache_operation) {
> +             ret = ctx->ops->begin_cache_operation(rreq);
> +             if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> +                     goto error_put;
> +     }
> +
> +     netfs_stat(&netfs_n_rh_write_begin);
> +     trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
> +
> +     /* Expand the request to meet caching requirements and download
> +      * preferences.
> +      */
> +     ractl._nr_pages = folio_nr_pages(folio);
> +     netfs_rreq_expand(rreq, &ractl);
> +
> +     /* We hold the folio locks, so we can drop the references */
> +     folio_get(folio);
> +     while (readahead_folio(&ractl))
> +             ;
> +
> +     ret = netfs_begin_read(rreq, true);
> +     if (ret < 0)
> +             goto error;
> +
> +have_folio:
> +     ret = folio_wait_fscache_killable(folio);
> +     if (ret < 0)
> +             goto error;
> +have_folio_no_wait:
> +     *_folio = folio;
> +     _leave(" = 0");
> +     return 0;
> +
> +error_put:
> +     netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
> +error:
> +     folio_unlock(folio);
> +     folio_put(folio);
> +     _leave(" = %d", ret);
> +     return ret;
> +}
> +EXPORT_SYMBOL(netfs_write_begin);
> diff --git a/fs/netfs/io.c b/fs/netfs/io.c
> index 058a534ba917..1fe9706c58a5 100644
> --- a/fs/netfs/io.c
> +++ b/fs/netfs/io.c
> @@ -246,91 +246,6 @@ static void netfs_rreq_write_to_cache(struct 
> netfs_io_request *rreq)
>               BUG();
>  }
>  
> -/*
> - * Unlock the folios in a read operation.  We need to set PG_fscache on any
> - * folios we're going to write back before we unlock them.
> - */
> -void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
> -{
> -     struct netfs_io_subrequest *subreq;
> -     struct folio *folio;
> -     unsigned int iopos, account = 0;
> -     pgoff_t start_page = rreq->start / PAGE_SIZE;
> -     pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
> -     bool subreq_failed = false;
> -
> -     XA_STATE(xas, &rreq->mapping->i_pages, start_page);
> -
> -     if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
> -             __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
> -             list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
> -                     __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
> -             }
> -     }
> -
> -     /* Walk through the pagecache and the I/O request lists simultaneously.
> -      * We may have a mixture of cached and uncached sections and we only
> -      * really want to write out the uncached sections.  This is slightly
> -      * complicated by the possibility that we might have huge pages with a
> -      * mixture inside.
> -      */
> -     subreq = list_first_entry(&rreq->subrequests,
> -                               struct netfs_io_subrequest, rreq_link);
> -     iopos = 0;
> -     subreq_failed = (subreq->error < 0);
> -
> -     trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
> -
> -     rcu_read_lock();
> -     xas_for_each(&xas, folio, last_page) {
> -             unsigned int pgpos = (folio_index(folio) - start_page) * 
> PAGE_SIZE;
> -             unsigned int pgend = pgpos + folio_size(folio);
> -             bool pg_failed = false;
> -
> -             for (;;) {
> -                     if (!subreq) {
> -                             pg_failed = true;
> -                             break;
> -                     }
> -                     if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
> -                             folio_start_fscache(folio);
> -                     pg_failed |= subreq_failed;
> -                     if (pgend < iopos + subreq->len)
> -                             break;
> -
> -                     account += subreq->transferred;
> -                     iopos += subreq->len;
> -                     if (!list_is_last(&subreq->rreq_link, 
> &rreq->subrequests)) {
> -                             subreq = list_next_entry(subreq, rreq_link);
> -                             subreq_failed = (subreq->error < 0);
> -                     } else {
> -                             subreq = NULL;
> -                             subreq_failed = false;
> -                     }
> -                     if (pgend == iopos)
> -                             break;
> -             }
> -
> -             if (!pg_failed) {
> -                     flush_dcache_folio(folio);
> -                     folio_mark_uptodate(folio);
> -             }
> -
> -             if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
> -                     if (folio_index(folio) == rreq->no_unlock_folio &&
> -                         test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
> -                             _debug("no unlock");
> -                     else
> -                             folio_unlock(folio);
> -             }
> -     }
> -     rcu_read_unlock();
> -
> -     task_io_account_read(account);
> -     if (rreq->netfs_ops->done)
> -             rreq->netfs_ops->done(rreq);
> -}
> -
>  /*
>   * Handle a short read.
>   */
> @@ -750,336 +665,3 @@ int netfs_begin_read(struct netfs_io_request *rreq, 
> bool sync)
>       }
>       return ret;
>  }
> -
> -static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
> -                                      loff_t *_start, size_t *_len, loff_t 
> i_size)
> -{
> -     struct netfs_cache_resources *cres = &rreq->cache_resources;
> -
> -     if (cres->ops && cres->ops->expand_readahead)
> -             cres->ops->expand_readahead(cres, _start, _len, i_size);
> -}
> -
> -static void netfs_rreq_expand(struct netfs_io_request *rreq,
> -                           struct readahead_control *ractl)
> -{
> -     /* Give the cache a chance to change the request parameters.  The
> -      * resultant request must contain the original region.
> -      */
> -     netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, 
> rreq->i_size);
> -
> -     /* Give the netfs a chance to change the request parameters.  The
> -      * resultant request must contain the original region.
> -      */
> -     if (rreq->netfs_ops->expand_readahead)
> -             rreq->netfs_ops->expand_readahead(rreq);
> -
> -     /* Expand the request if the cache wants it to start earlier.  Note
> -      * that the expansion may get further extended if the VM wishes to
> -      * insert THPs and the preferred start and/or end wind up in the middle
> -      * of THPs.
> -      *
> -      * If this is the case, however, the THP size should be an integer
> -      * multiple of the cache granule size, so we get a whole number of
> -      * granules to deal with.
> -      */
> -     if (rreq->start  != readahead_pos(ractl) ||
> -         rreq->len != readahead_length(ractl)) {
> -             readahead_expand(ractl, rreq->start, rreq->len);
> -             rreq->start  = readahead_pos(ractl);
> -             rreq->len = readahead_length(ractl);
> -
> -             trace_netfs_read(rreq, readahead_pos(ractl), 
> readahead_length(ractl),
> -                              netfs_read_trace_expanded);
> -     }
> -}
> -
> -/**
> - * netfs_readahead - Helper to manage a read request
> - * @ractl: The description of the readahead request
> - *
> - * Fulfil a readahead request by drawing data from the cache if possible, or
> - * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
> - * requests from different sources will get munged together.  If necessary, 
> the
> - * readahead window can be expanded in either direction to a more convenient
> - * alighment for RPC efficiency or to make storage in the cache feasible.
> - *
> - * The calling netfs must initialise a netfs context contiguous to the vfs
> - * inode before calling this.
> - *
> - * This is usable whether or not caching is enabled.
> - */
> -void netfs_readahead(struct readahead_control *ractl)
> -{
> -     struct netfs_io_request *rreq;
> -     struct netfs_i_context *ctx = netfs_i_context(ractl->mapping->host);
> -     int ret;
> -
> -     _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
> -
> -     if (readahead_count(ractl) == 0)
> -             return;
> -
> -     rreq = netfs_alloc_request(ractl->mapping, ractl->file,
> -                                readahead_pos(ractl),
> -                                readahead_length(ractl),
> -                                NETFS_READAHEAD);
> -     if (IS_ERR(rreq))
> -             return;
> -
> -     if (ctx->ops->begin_cache_operation) {
> -             ret = ctx->ops->begin_cache_operation(rreq);
> -             if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> -                     goto cleanup_free;
> -     }
> -
> -     netfs_stat(&netfs_n_rh_readahead);
> -     trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
> -                      netfs_read_trace_readahead);
> -
> -     netfs_rreq_expand(rreq, ractl);
> -
> -     /* Drop the refs on the folios here rather than in the cache or
> -      * filesystem.  The locks will be dropped in netfs_rreq_unlock().
> -      */
> -     while (readahead_folio(ractl))
> -             ;
> -
> -     netfs_begin_read(rreq, false);
> -     return;
> -
> -cleanup_free:
> -     netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
> -     return;
> -}
> -EXPORT_SYMBOL(netfs_readahead);
> -
> -/**
> - * netfs_readpage - Helper to manage a readpage request
> - * @file: The file to read from
> - * @subpage: A subpage of the folio to read
> - *
> - * Fulfil a readpage request by drawing data from the cache if possible, or 
> the
> - * netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O requests
> - * from different sources will get munged together.
> - *
> - * The calling netfs must initialise a netfs context contiguous to the vfs
> - * inode before calling this.
> - *
> - * This is usable whether or not caching is enabled.
> - */
> -int netfs_readpage(struct file *file, struct page *subpage)
> -{
> -     struct folio *folio = page_folio(subpage);
> -     struct address_space *mapping = folio->mapping;
> -     struct netfs_io_request *rreq;
> -     struct netfs_i_context *ctx = netfs_i_context(mapping->host);
> -     int ret;
> -
> -     _enter("%lx", folio_index(folio));
> -
> -     rreq = netfs_alloc_request(mapping, file,
> -                                folio_file_pos(folio), folio_size(folio),
> -                                NETFS_READPAGE);
> -     if (IS_ERR(rreq)) {
> -             ret = PTR_ERR(rreq);
> -             goto alloc_error;
> -     }
> -
> -     if (ctx->ops->begin_cache_operation) {
> -             ret = ctx->ops->begin_cache_operation(rreq);
> -             if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> -                     goto discard;
> -     }
> -
> -     netfs_stat(&netfs_n_rh_readpage);
> -     trace_netfs_read(rreq, rreq->start, rreq->len, 
> netfs_read_trace_readpage);
> -     return netfs_begin_read(rreq, true);
> -
> -discard:
> -     netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
> -alloc_error:
> -     folio_unlock(folio);
> -     return ret;
> -}
> -EXPORT_SYMBOL(netfs_readpage);
> -
> -/*
> - * Prepare a folio for writing without reading first
> - * @folio: The folio being prepared
> - * @pos: starting position for the write
> - * @len: length of write
> - * @always_fill: T if the folio should always be completely filled/cleared
> - *
> - * In some cases, write_begin doesn't need to read at all:
> - * - full folio write
> - * - write that lies in a folio that is completely beyond EOF
> - * - write that covers the folio from start to EOF or beyond it
> - *
> - * If any of these criteria are met, then zero out the unwritten parts
> - * of the folio and return true. Otherwise, return false.
> - */
> -static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t 
> len,
> -                              bool always_fill)
> -{
> -     struct inode *inode = folio_inode(folio);
> -     loff_t i_size = i_size_read(inode);
> -     size_t offset = offset_in_folio(folio, pos);
> -     size_t plen = folio_size(folio);
> -
> -     if (unlikely(always_fill)) {
> -             if (pos - offset + len <= i_size)
> -                     return false; /* Page entirely before EOF */
> -             zero_user_segment(&folio->page, 0, plen);
> -             folio_mark_uptodate(folio);
> -             return true;
> -     }
> -
> -     /* Full folio write */
> -     if (offset == 0 && len >= plen)
> -             return true;
> -
> -     /* Page entirely beyond the end of the file */
> -     if (pos - offset >= i_size)
> -             goto zero_out;
> -
> -     /* Write that covers from the start of the folio to EOF or beyond */
> -     if (offset == 0 && (pos + len) >= i_size)
> -             goto zero_out;
> -
> -     return false;
> -zero_out:
> -     zero_user_segments(&folio->page, 0, offset, offset + len, len);
> -     return true;
> -}
> -
> -/**
> - * netfs_write_begin - Helper to prepare for writing
> - * @file: The file to read from
> - * @mapping: The mapping to read from
> - * @pos: File position at which the write will begin
> - * @len: The length of the write (may extend beyond the end of the folio 
> chosen)
> - * @aop_flags: AOP_* flags
> - * @_folio: Where to put the resultant folio
> - * @_fsdata: Place for the netfs to store a cookie
> - *
> - * Pre-read data for a write-begin request by drawing data from the cache if
> - * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
> - * Multiple I/O requests from different sources will get munged together.  If
> - * necessary, the readahead window can be expanded in either direction to a
> - * more convenient alighment for RPC efficiency or to make storage in the 
> cache
> - * feasible.
> - *
> - * The calling netfs must provide a table of operations, only one of which,
> - * issue_op, is mandatory.
> - *
> - * The check_write_begin() operation can be provided to check for and flush
> - * conflicting writes once the folio is grabbed and locked.  It is passed a
> - * pointer to the fsdata cookie that gets returned to the VM to be passed to
> - * write_end.  It is permitted to sleep.  It should return 0 if the request
> - * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
> - * be regot; or return an error.
> - *
> - * The calling netfs must initialise a netfs context contiguous to the vfs
> - * inode before calling this.
> - *
> - * This is usable whether or not caching is enabled.
> - */
> -int netfs_write_begin(struct file *file, struct address_space *mapping,
> -                   loff_t pos, unsigned int len, unsigned int aop_flags,
> -                   struct folio **_folio, void **_fsdata)
> -{
> -     struct netfs_io_request *rreq;
> -     struct netfs_i_context *ctx = netfs_i_context(file_inode(file ));
> -     struct folio *folio;
> -     unsigned int fgp_flags;
> -     pgoff_t index = pos >> PAGE_SHIFT;
> -     int ret;
> -
> -     DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
> -
> -retry:
> -     fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
> -     if (aop_flags & AOP_FLAG_NOFS)
> -             fgp_flags |= FGP_NOFS;
> -     folio = __filemap_get_folio(mapping, index, fgp_flags,
> -                                 mapping_gfp_mask(mapping));
> -     if (!folio)
> -             return -ENOMEM;
> -
> -     if (ctx->ops->check_write_begin) {
> -             /* Allow the netfs (eg. ceph) to flush conflicts. */
> -             ret = ctx->ops->check_write_begin(file, pos, len, folio, 
> _fsdata);
> -             if (ret < 0) {
> -                     trace_netfs_failure(NULL, NULL, ret, 
> netfs_fail_check_write_begin);
> -                     if (ret == -EAGAIN)
> -                             goto retry;
> -                     goto error;
> -             }
> -     }
> -
> -     if (folio_test_uptodate(folio))
> -             goto have_folio;
> -
> -     /* If the page is beyond the EOF, we want to clear it - unless it's
> -      * within the cache granule containing the EOF, in which case we need
> -      * to preload the granule.
> -      */
> -     if (!netfs_is_cache_enabled(ctx) &&
> -         netfs_skip_folio_read(folio, pos, len, false)) {
> -             netfs_stat(&netfs_n_rh_write_zskip);
> -             goto have_folio_no_wait;
> -     }
> -
> -     rreq = netfs_alloc_request(mapping, file,
> -                                folio_file_pos(folio), folio_size(folio),
> -                                NETFS_READ_FOR_WRITE);
> -     if (IS_ERR(rreq)) {
> -             ret = PTR_ERR(rreq);
> -             goto error;
> -     }
> -     rreq->no_unlock_folio   = folio_index(folio);
> -     __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
> -
> -     if (ctx->ops->begin_cache_operation) {
> -             ret = ctx->ops->begin_cache_operation(rreq);
> -             if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> -                     goto error_put;
> -     }
> -
> -     netfs_stat(&netfs_n_rh_write_begin);
> -     trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
> -
> -     /* Expand the request to meet caching requirements and download
> -      * preferences.
> -      */
> -     ractl._nr_pages = folio_nr_pages(folio);
> -     netfs_rreq_expand(rreq, &ractl);
> -
> -     /* We hold the folio locks, so we can drop the references */
> -     folio_get(folio);
> -     while (readahead_folio(&ractl))
> -             ;
> -
> -     ret = netfs_begin_read(rreq, true);
> -     if (ret < 0)
> -             goto error;
> -
> -have_folio:
> -     ret = folio_wait_fscache_killable(folio);
> -     if (ret < 0)
> -             goto error;
> -have_folio_no_wait:
> -     *_folio = folio;
> -     _leave(" = 0");
> -     return 0;
> -
> -error_put:
> -     netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
> -error:
> -     folio_unlock(folio);
> -     folio_put(folio);
> -     _leave(" = %d", ret);
> -     return ret;
> -}
> -EXPORT_SYMBOL(netfs_write_begin);
> 
> 

Patch itself is fine though.

Reviewed-by: Jeff Layton <jlay...@kernel.org>

--
Linux-cachefs mailing list
Linux-cachefs@redhat.com
https://listman.redhat.com/mailman/listinfo/linux-cachefs

Reply via email to