tor 2011-01-27 klockan 14:09 +0100 skrev Johannes Hirte: > On Thursday 09 December 2010 10:30:14 Zhong, Xin wrote: > > This problem is found in meego testing: > > http://bugs.meego.com/show_bug.cgi?id=6672 > > A file in btrfs is mmaped and the mmaped buffer is passed to pwrite to > > write to the same page of the same file. In btrfs_file_aio_write(), the > > pages is locked by prepare_pages(). So when btrfs_copy_from_user() is > > called, page fault happens and the same page needs to be locked again in > > filemap_fault(). The fix is to move iov_iter_fault_in_readable() before > > prepage_pages() to make page fault happen before pages are locked. And > > also disable page fault in critical region in btrfs_copy_from_user(). > > > > Reviewed-by: Yan, Zheng<zheng.z....@intel.com> > > Signed-off-by: Zhong, Xin <xin.zh...@intel.com> > > --- > > fs/btrfs/file.c | 92 > > ++++++++++++++++++++++++++++++++++++------------------- 1 files changed, > > 60 insertions(+), 32 deletions(-) > > > > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c > > index c1faded..66836d8 100644 > > --- a/fs/btrfs/file.c > > +++ b/fs/btrfs/file.c > > @@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, > > int num_pages, struct page **prepared_pages, > > struct iov_iter *i) > > { > > - size_t copied; > > + size_t copied = 0; > > int pg = 0; > > int offset = pos & (PAGE_CACHE_SIZE - 1); > > + int total_copied = 0; > > > > while (write_bytes > 0) { > > size_t count = min_t(size_t, > > PAGE_CACHE_SIZE - offset, write_bytes); > > struct page *page = prepared_pages[pg]; > > -again: > > - if (unlikely(iov_iter_fault_in_readable(i, count))) > > - return -EFAULT; > > - > > - /* Copy data from userspace to the current page */ > > - copied = iov_iter_copy_from_user(page, i, offset, count); > > + /* > > + * Copy data from userspace to the current page > > + * > > + * Disable pagefault to avoid recursive lock since > > + * the pages are already locked > > + */ > > + pagefault_disable(); > > + copied = iov_iter_copy_from_user_atomic(page, i, offset, count); > > + pagefault_enable(); > > > > /* Flush processor's dcache for this page */ > > flush_dcache_page(page); > > iov_iter_advance(i, copied); > > write_bytes -= copied; > > + total_copied += copied; > > > > + /* Return to btrfs_file_aio_write to fault page */ > > if (unlikely(copied == 0)) { > > - count = min_t(size_t, PAGE_CACHE_SIZE - offset, > > - iov_iter_single_seg_count(i)); > > - goto again; > > + break; > > } > > > > if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { > > @@ -81,7 +85,7 @@ again: > > offset = 0; > > } > > } > > - return 0; > > + return total_copied; > > } > > > > /* > > @@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, > > unsigned long last_index; > > int will_write; > > int buffered = 0; > > + int copied = 0; > > + int dirty_pages = 0; > > > > will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || > > (file->f_flags & O_DIRECT)); > > @@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb > > *iocb, WARN_ON(num_pages > nrptrs); > > memset(pages, 0, sizeof(struct page *) * nrptrs); > > > > - ret = btrfs_delalloc_reserve_space(inode, write_bytes); > > + /* > > + * Fault pages before locking them in prepare_pages > > + * to avoid recursive lock > > + */ > > + if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { > > + ret = -EFAULT; > > + goto out; > > + } > > + > > + ret = btrfs_delalloc_reserve_space(inode, > > + num_pages << PAGE_CACHE_SHIFT); > > if (ret) > > goto out; > > > > @@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb > > *iocb, pos, first_index, last_index, > > write_bytes); > > if (ret) { > > - btrfs_delalloc_release_space(inode, write_bytes); > > + btrfs_delalloc_release_space(inode, > > + num_pages << PAGE_CACHE_SHIFT); > > goto out; > > } > > > > - ret = btrfs_copy_from_user(pos, num_pages, > > + copied = btrfs_copy_from_user(pos, num_pages, > > write_bytes, pages, &i); > > - if (ret == 0) { > > + dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> > > + PAGE_CACHE_SHIFT; > > + > > + if (num_pages > dirty_pages) { > > + if (copied > 0) > > + atomic_inc( > > + &BTRFS_I(inode)->outstanding_extents); > > + btrfs_delalloc_release_space(inode, > > + (num_pages - dirty_pages) << > > + PAGE_CACHE_SHIFT); > > + } > > + > > + if (copied > 0) { > > dirty_and_release_pages(NULL, root, file, pages, > > - num_pages, pos, write_bytes); > > + dirty_pages, pos, copied); > > } > > > > btrfs_drop_pages(pages, num_pages); > > - if (ret) { > > - btrfs_delalloc_release_space(inode, write_bytes); > > - goto out; > > - } > > > > - if (will_write) { > > - filemap_fdatawrite_range(inode->i_mapping, pos, > > - pos + write_bytes - 1); > > - } else { > > - balance_dirty_pages_ratelimited_nr(inode->i_mapping, > > - num_pages); > > - if (num_pages < > > - (root->leafsize >> PAGE_CACHE_SHIFT) + 1) > > - btrfs_btree_balance_dirty(root, 1); > > - btrfs_throttle(root); > > + if (copied > 0) { > > + if (will_write) { > > + filemap_fdatawrite_range(inode->i_mapping, pos, > > + pos + copied - 1); > > + } else { > > + balance_dirty_pages_ratelimited_nr( > > + inode->i_mapping, > > + dirty_pages); > > + if (dirty_pages < > > + (root->leafsize >> PAGE_CACHE_SHIFT) + 1) > > + btrfs_btree_balance_dirty(root, 1); > > + btrfs_throttle(root); > > + } > > } > > > > - pos += write_bytes; > > - num_written += write_bytes; > > + pos += copied; > > + num_written += copied; > > > > cond_resched(); > > } > > This patch breaks one of my Gentoo boxes. When I try to install/update via > emerge, some packages hang. It seems that it's always a "svn info" process > that is stuck in kernel eating 100% CPU. I don't know how svn is involved > here, but reverting this patch makes the system work again. I'll try to get a > simple testcase. > > regards, > Johannes
The same thing happens here. The only way to kill the process (svn info) is to restart the computer. Running vanilla 2.6.37 without patches. #strace emerge libgcrypt -- cut -- open("/var/tmp/portage/dev-libs/libgcrypt-1.4.6/temp/build.log", O_WRONLY|O_CREAT|O_APPEND|O_LARGEFILE, 0666) = 7 fstat64(7, {st_mode=S_IFREG|0660, st_size=416, ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb713a000 fstat64(7, {st_mode=S_IFREG|0660, st_size=416, ...}) = 0 _llseek(7, 416, [416], SEEK_SET) = 0 fstat64(7, {st_mode=S_IFREG|0660, st_size=416, ...}) = 0 stat64("/var/tmp/portage/dev-libs/libgcrypt-1.4.6/temp/build.log", {st_mode=S_IFREG|0660, st_size=416, ...}) = 0 dup(1) = 8 fcntl64(8, F_GETFL) = 0x2 (flags O_RDWR) fstat64(8, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 3), ...}) = 0 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7005000 _llseek(8, 0, 0xbff80cfc, SEEK_CUR) = -1 ESPIPE (Illegal seek) fstat64(8, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 3), ...}) = 0 stat64("/var/tmp/portage/dev-libs/libgcrypt-1.4.6/temp/environment", {st_mode=S_IFREG|0664, st_size=105970, ...}) = 0 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID| SIGCHLD, child_tidptr=0xb7430728) = 11665 close(6) = 0 gettimeofday({1296162836, 126192}, NULL) = 0 poll([{fd=3, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}, {fd=5, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}], 2, 3000) = 1 ([{fd=5, revents=POLLIN}]) read(5, ">>> Preparing source in /var/tmp"..., 4096) = 91 read(5, 0xb713b000, 4096) = -1 EAGAIN (Resource temporarily unavailable) write(8, ">>> Preparing source in /var/tmp"..., 91>>> Preparing source in /var/tmp/portage/dev-libs/libgcrypt-1.4.6/work/libgcrypt-1.4.6 ... ) = 91 write(7, ">>> Preparing source in /var/tmp"..., 91) = 91 poll([{fd=3, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}, {fd=5, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}], 2, 3000) = 0 (Timeout) poll([{fd=3, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}, {fd=5, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}], 2, 3000) = 0 (Timeout) poll([{fd=3, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}, {fd=5, events=POLLIN|POLLERR|POLLHUP|POLLNVAL}], 2, 3000) = 0 (Timeout) The last line is repeated every 3sec. // Maria -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html