On Sun, 2021-06-13 at 07:36 -0400, Jeff Layton wrote:
> It's not sufficient to skip reading when the pos is beyond the EOF.
> There may be data at the head of the page that we need to fill in
> before the write.
>
> Add a new helper function that corrects and clarifies the logic.
>
> Cc: <[email protected]> # v5.10+
> Cc: Matthew Wilcox <[email protected]>
> Fixes: 1cc1699070bd ("ceph: fold ceph_update_writeable_page into
> ceph_write_begin")
> Reported-by: Andrew W Elble <[email protected]>
> Signed-off-by: Jeff Layton <[email protected]>
> ---
> fs/ceph/addr.c | 63 +++++++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 50 insertions(+), 13 deletions(-)
>
> This version just has a couple of future-proofing tweaks that Willy
> suggested.
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 26e66436f005..b20a17cfec42 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -1302,6 +1302,54 @@ ceph_find_incompatible(struct page *page)
> return NULL;
> }
>
> +/**
> + * prep_noread_page - prep a page for writing without reading first
> + * @page: page being prepared
> + * @pos: starting position for the write
> + * @len: length of write
> + *
> + * In some cases we don't need to read at all:
> + * - full page write
> + * - file is currently zero-length
> + * - write that lies in a page that is completely beyond EOF
> + * - write that covers the the page from start to EOF or beyond it
> + *
> + * If any of these criteria are met, then zero out the unwritten parts
> + * of the page and return true. Otherwise, return false.
> + */
> +static bool prep_noread_page(struct page *page, loff_t pos, size_t len)
> +{
> + struct inode *inode = page->mapping->host;
> + loff_t i_size = i_size_read(inode);
> + pgoff_t index = pos / PAGE_SIZE;
> + size_t offset = offset_in_page(pos);
> +
> + /* clamp length to end of the current page */
> + if (len > PAGE_SIZE)
> + len = PAGE_SIZE - offset;
Actually, I think this should be:
len = min(len, PAGE_SIZE - offset);
Otherwise, len could still go beyond the end of the page.
> +
> + /* full page write */
> + if (offset == 0 && len == PAGE_SIZE)
> + goto zero_out;
> +
> + /* zero-length file */
> + if (i_size == 0)
> + goto zero_out;
> +
> + /* position beyond last page in the file */
> + if (index > ((i_size - 1) / PAGE_SIZE))
> + goto zero_out;
> +
> + /* write that covers the the page from start to EOF or beyond it */
> + if (offset == 0 && (pos + len) >= i_size)
> + goto zero_out;
> +
> + return false;
> +zero_out:
> + zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE);
> + return true;
> +}
> +
> /*
> * We are only allowed to write into/dirty the page if the page is
> * clean, or already dirty within the same snap context.
> @@ -1315,7 +1363,6 @@ static int ceph_write_begin(struct file *file, struct
> address_space *mapping,
> struct ceph_snap_context *snapc;
> struct page *page = NULL;
> pgoff_t index = pos >> PAGE_SHIFT;
> - int pos_in_page = pos & ~PAGE_MASK;
> int r = 0;
>
> dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page,
> (int)pos, (int)len);
> @@ -1350,19 +1397,9 @@ static int ceph_write_begin(struct file *file, struct
> address_space *mapping,
> break;
> }
>
> - /*
> - * In some cases we don't need to read at all:
> - * - full page write
> - * - write that lies completely beyond EOF
> - * - write that covers the the page from start to EOF or beyond
> it
> - */
> - if ((pos_in_page == 0 && len == PAGE_SIZE) ||
> - (pos >= i_size_read(inode)) ||
> - (pos_in_page == 0 && (pos + len) >= i_size_read(inode))) {
> - zero_user_segments(page, 0, pos_in_page,
> - pos_in_page + len, PAGE_SIZE);
> + /* No need to read in some cases */
> + if (prep_noread_page(page, pos, len))
> break;
> - }
>
> /*
> * We need to read it. If we get back -EINPROGRESS, then the
> page was
--
Jeff Layton <[email protected]>
--
Linux-cachefs mailing list
[email protected]
https://listman.redhat.com/mailman/listinfo/linux-cachefs