Hi Kuai,

On Thu, Jul 30, 2020 at 09:19:01AM +0800, Yu Kuai wrote:
> commit 9dc55f1389f9 ("iomap: add support for sub-pagesize buffered I/O
> without buffer heads") replace the per-block structure buffer_head with
> the per-page structure iomap_page. However, iomap_page can't track the
> dirty state of sub pages, which will cause performance issue since sub
> pages will be writeback even if they are not dirty.
> 
> For example, if block size is 4k and page size is 64k:
> 
> dd if=/dev/zero of=testfile bs=4k count=16 oflag=sync
> 
> With buffer_head implementation, the above dd cmd will writeback 4k in
> each round. However, with iomap_page implementation, the range of
> writeback in each round is from the start of the page to the end offset
> we just wrote.
> 
> Thus add support to track dirty state for sub pages in iomap_page.

AFAIK, the current focus is also on the numbers in the original
discussion thread, so it'd be better to show some numbers with
large PAGE_SIZE on this with some workloads as well.

https://lore.kernel.org/r/[email protected]

e.g. My guess is if the dirty blocks in the page are highly fragmented, maybe
it'd be better to writeback the whole page in an IO rather than individual 
blocks.

At a very quick glance, the approach looks good to me.

Thanks,
Gao Xiang

> 
> Signed-off-by: Yu Kuai <[email protected]>
> ---
>  fs/iomap/buffered-io.c | 51 +++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 50 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index bcfc288dba3f..ac2676146b98 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -29,7 +29,9 @@ struct iomap_page {
>       atomic_t                read_count;
>       atomic_t                write_count;
>       spinlock_t              uptodate_lock;
> +     spinlock_t              dirty_lock;
>       DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
> +     DECLARE_BITMAP(dirty, PAGE_SIZE / 512);
>  };
>  
>  static inline struct iomap_page *to_iomap_page(struct page *page)
> @@ -53,7 +55,9 @@ iomap_page_create(struct inode *inode, struct page *page)
>       atomic_set(&iop->read_count, 0);
>       atomic_set(&iop->write_count, 0);
>       spin_lock_init(&iop->uptodate_lock);
> +     spin_lock_init(&iop->dirty_lock);
>       bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
> +     bitmap_zero(iop->dirty, PAGE_SIZE / SECTOR_SIZE);
>  
>       /*
>        * migrate_page_move_mapping() assumes that pages with private data have
> @@ -135,6 +139,44 @@ iomap_adjust_read_range(struct inode *inode, struct 
> iomap_page *iop,
>       *lenp = plen;
>  }
>  
> +static void
> +iomap_iop_set_or_clear_range_dirty(
> +     struct page *page,
> +     unsigned int off,
> +     unsigned int len,
> +     bool is_set)
> +{
> +     struct iomap_page *iop = to_iomap_page(page);
> +     struct inode *inode = page->mapping->host;
> +     unsigned int first = off >> inode->i_blkbits;
> +     unsigned int last = (off + len - 1) >> inode->i_blkbits;
> +     unsigned long flags;
> +     unsigned int i;
> +
> +     spin_lock_irqsave(&iop->dirty_lock, flags);
> +     for (i = first; i <= last; i++)
> +             if (is_set)
> +                     set_bit(i, iop->dirty);
> +             else
> +                     clear_bit(i, iop->dirty);
> +     spin_unlock_irqrestore(&iop->dirty_lock, flags);
> +}
> +
> +static void
> +iomap_set_or_clear_range_dirty(
> +     struct page *page,
> +     unsigned int off,
> +     unsigned int len,
> +     bool is_set)
> +{
> +     if (PageError(page))
> +             return;
> +
> +     if (page_has_private(page))
> +             iomap_iop_set_or_clear_range_dirty(
> +                     page, off, len, is_set);
3> +}
> +
>  static void
>  iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len)
>  {
> @@ -705,6 +747,8 @@ __iomap_write_end(struct inode *inode, loff_t pos, 
> unsigned len,
>       if (unlikely(copied < len && !PageUptodate(page)))
>               return 0;
>       iomap_set_range_uptodate(page, offset_in_page(pos), len);
> +     iomap_set_or_clear_range_dirty(
> +             page, offset_in_page(pos), len, true);
>       iomap_set_page_dirty(page);
>       return copied;
>  }
> @@ -1030,6 +1074,8 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t 
> pos, loff_t length,
>               WARN_ON_ONCE(!PageUptodate(page));
>               iomap_page_create(inode, page);
>               set_page_dirty(page);
> +             iomap_set_or_clear_range_dirty(
> +                     page, offset_in_page(pos), length, true);
>       }
>  
>       return length;
> @@ -1386,7 +1432,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
>       for (i = 0, file_offset = page_offset(page);
>            i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset;
>            i++, file_offset += len) {
> -             if (iop && !test_bit(i, iop->uptodate))
> +             if (iop && (!test_bit(i, iop->uptodate) ||
> +                 !test_bit(i, iop->dirty)))
>                       continue;
>  
>               error = wpc->ops->map_blocks(wpc, inode, file_offset);
> @@ -1435,6 +1482,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
>                */
>               set_page_writeback_keepwrite(page);
>       } else {
> +             iomap_set_or_clear_range_dirty(
> +                     page, 0, end_offset - page_offset(page) + 1, false);
>               clear_page_dirty_for_io(page);
>               set_page_writeback(page);
>       }
> -- 
> 2.25.4
> 

Reply via email to