On Thursday 09 December 2010 10:30:14 Zhong, Xin wrote:
> This problem is found in meego testing:
> http://bugs.meego.com/show_bug.cgi?id=6672
> A file in btrfs is mmaped and the mmaped buffer is passed to pwrite to
> write to the same page of the same file. In btrfs_file_aio_write(), the
> pages is locked by prepare_pages(). So when btrfs_copy_from_user() is
> called, page fault happens and the same page needs to be locked again in
> filemap_fault(). The fix is to move iov_iter_fault_in_readable() before
> prepage_pages() to make page fault happen before pages are locked. And
> also disable page fault in critical region in btrfs_copy_from_user().
> 
> Reviewed-by: Yan, Zheng<zheng.z....@intel.com>
> Signed-off-by: Zhong, Xin <xin.zh...@intel.com>
> ---
>  fs/btrfs/file.c |   92
> ++++++++++++++++++++++++++++++++++++------------------- 1 files changed,
> 60 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index c1faded..66836d8 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos,
> int num_pages, struct page **prepared_pages,
>                                        struct iov_iter *i)
>  {
> -     size_t copied;
> +     size_t copied = 0;
>       int pg = 0;
>       int offset = pos & (PAGE_CACHE_SIZE - 1);
> +     int total_copied = 0;
> 
>       while (write_bytes > 0) {
>               size_t count = min_t(size_t,
>                                    PAGE_CACHE_SIZE - offset, write_bytes);
>               struct page *page = prepared_pages[pg];
> -again:
> -             if (unlikely(iov_iter_fault_in_readable(i, count)))
> -                     return -EFAULT;
> -
> -             /* Copy data from userspace to the current page */
> -             copied = iov_iter_copy_from_user(page, i, offset, count);
> +             /*
> +              * Copy data from userspace to the current page
> +              *
> +              * Disable pagefault to avoid recursive lock since
> +              * the pages are already locked
> +              */
> +             pagefault_disable();
> +             copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
> +             pagefault_enable();
> 
>               /* Flush processor's dcache for this page */
>               flush_dcache_page(page);
>               iov_iter_advance(i, copied);
>               write_bytes -= copied;
> +             total_copied += copied;
> 
> +             /* Return to btrfs_file_aio_write to fault page */
>               if (unlikely(copied == 0)) {
> -                     count = min_t(size_t, PAGE_CACHE_SIZE - offset,
> -                                   iov_iter_single_seg_count(i));
> -                     goto again;
> +                     break;
>               }
> 
>               if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
> @@ -81,7 +85,7 @@ again:
>                       offset = 0;
>               }
>       }
> -     return 0;
> +     return total_copied;
>  }
> 
>  /*
> @@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
>       unsigned long last_index;
>       int will_write;
>       int buffered = 0;
> +     int copied = 0;
> +     int dirty_pages = 0;
> 
>       will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
>                     (file->f_flags & O_DIRECT));
> @@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb
> *iocb, WARN_ON(num_pages > nrptrs);
>               memset(pages, 0, sizeof(struct page *) * nrptrs);
> 
> -             ret = btrfs_delalloc_reserve_space(inode, write_bytes);
> +             /*
> +              * Fault pages before locking them in prepare_pages
> +              * to avoid recursive lock
> +              */
> +             if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
> +                     ret = -EFAULT;
> +                     goto out;
> +             }
> +
> +             ret = btrfs_delalloc_reserve_space(inode,
> +                                     num_pages << PAGE_CACHE_SHIFT);
>               if (ret)
>                       goto out;
> 
> @@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb
> *iocb, pos, first_index, last_index,
>                                   write_bytes);
>               if (ret) {
> -                     btrfs_delalloc_release_space(inode, write_bytes);
> +                     btrfs_delalloc_release_space(inode,
> +                                     num_pages << PAGE_CACHE_SHIFT);
>                       goto out;
>               }
> 
> -             ret = btrfs_copy_from_user(pos, num_pages,
> +             copied = btrfs_copy_from_user(pos, num_pages,
>                                          write_bytes, pages, &i);
> -             if (ret == 0) {
> +             dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
> +                                     PAGE_CACHE_SHIFT;
> +
> +             if (num_pages > dirty_pages) {
> +                     if (copied > 0)
> +                             atomic_inc(
> +                                     &BTRFS_I(inode)->outstanding_extents);
> +                     btrfs_delalloc_release_space(inode,
> +                                     (num_pages - dirty_pages) <<
> +                                     PAGE_CACHE_SHIFT);
> +             }
> +
> +             if (copied > 0) {
>                       dirty_and_release_pages(NULL, root, file, pages,
> -                                             num_pages, pos, write_bytes);
> +                                             dirty_pages, pos, copied);
>               }
> 
>               btrfs_drop_pages(pages, num_pages);
> -             if (ret) {
> -                     btrfs_delalloc_release_space(inode, write_bytes);
> -                     goto out;
> -             }
> 
> -             if (will_write) {
> -                     filemap_fdatawrite_range(inode->i_mapping, pos,
> -                                              pos + write_bytes - 1);
> -             } else {
> -                     balance_dirty_pages_ratelimited_nr(inode->i_mapping,
> -                                                        num_pages);
> -                     if (num_pages <
> -                         (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
> -                             btrfs_btree_balance_dirty(root, 1);
> -                     btrfs_throttle(root);
> +             if (copied > 0) {
> +                     if (will_write) {
> +                             filemap_fdatawrite_range(inode->i_mapping, pos,
> +                                                      pos + copied - 1);
> +                     } else {
> +                             balance_dirty_pages_ratelimited_nr(
> +                                                     inode->i_mapping,
> +                                                     dirty_pages);
> +                             if (dirty_pages <
> +                             (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
> +                                     btrfs_btree_balance_dirty(root, 1);
> +                             btrfs_throttle(root);
> +                     }
>               }
> 
> -             pos += write_bytes;
> -             num_written += write_bytes;
> +             pos += copied;
> +             num_written += copied;
> 
>               cond_resched();
>       }

This patch breaks one of my Gentoo boxes. When I try to install/update via 
emerge, some packages hang. It seems that it's always a "svn info" process 
that is stuck in kernel eating 100% CPU. I don't know how svn is involved 
here, but reverting this patch makes the system work again. I'll try to get a 
simple testcase.

regards,
  Johannes
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to