In order for AIO to work, we need to implement aio_write. This patch converts our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and nothing broke, and the AIO stuff magically started working. Thanks,
Signed-off-by: Josef Bacik <jo...@redhat.com> --- fs/btrfs/extent_io.c | 11 +++- fs/btrfs/file.c | 152 +++++++++++++++++++++++--------------------------- 2 files changed, 80 insertions(+), 83 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d2d0368..c407f1c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2020,6 +2020,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, sector_t sector; struct extent_map *em; struct block_device *bdev; + struct btrfs_ordered_extent *ordered; int ret; int nr = 0; size_t page_offset = 0; @@ -2031,7 +2032,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, set_page_extent_mapped(page); end = page_end; - lock_extent(tree, start, end, GFP_NOFS); + while (1) { + lock_extent(tree, start, end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, start); + if (!ordered) + break; + unlock_extent(tree, start, end, GFP_NOFS); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } if (page->index == last_byte >> PAGE_CACHE_SHIFT) { char *userpage; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index dace07b..ce35431 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -46,32 +46,42 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, struct page **prepared_pages, - const char __user *buf) + struct iov_iter *i) { - long page_fault = 0; - int i; + size_t copied; + int pg = 0; int offset = pos & (PAGE_CACHE_SIZE - 1); - for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { + while (write_bytes > 0) { size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes); - struct page *page = prepared_pages[i]; - fault_in_pages_readable(buf, count); + struct page *page = prepared_pages[pg]; +again: + if (unlikely(iov_iter_fault_in_readable(i, count))) + return -EFAULT; /* Copy data from userspace to the current page */ - kmap(page); - page_fault = __copy_from_user(page_address(page) + offset, - buf, count); + copied = iov_iter_copy_from_user(page, i, offset, count); + /* Flush processor's dcache for this page */ flush_dcache_page(page); - kunmap(page); - buf += count; - write_bytes -= count; + iov_iter_advance(i, copied); + write_bytes -= copied; - if (page_fault) - break; + if (unlikely(copied == 0)) { + count = min_t(size_t, PAGE_CACHE_SIZE - offset, + iov_iter_single_seg_count(i)); + goto again; + } + + if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { + offset += copied; + } else { + pg++; + offset = 0; + } } - return page_fault ? -EFAULT : 0; + return 0; } /* @@ -823,60 +833,24 @@ again: return 0; } -/* Copied from read-write.c */ -static void wait_on_retry_sync_kiocb(struct kiocb *iocb) -{ - set_current_state(TASK_UNINTERRUPTIBLE); - if (!kiocbIsKicked(iocb)) - schedule(); - else - kiocbClearKicked(iocb); - __set_current_state(TASK_RUNNING); -} - -/* - * Just a copy of what do_sync_write does. - */ -static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf, - size_t count, loff_t pos, loff_t *ppos) -{ - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; - unsigned long nr_segs = 1; - struct kiocb kiocb; - ssize_t ret; - - init_sync_kiocb(&kiocb, file); - kiocb.ki_pos = pos; - kiocb.ki_left = count; - kiocb.ki_nbytes = count; - - while (1) { - ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos, - ppos, count, count); - if (ret != -EIOCBRETRY) - break; - wait_on_retry_sync_kiocb(&kiocb); - } - - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - *ppos = kiocb.ki_pos; - return ret; -} - -static ssize_t btrfs_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t btrfs_file_aio_write(struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - loff_t pos; + struct file *file = iocb->ki_filp; + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct page *pinned[2]; + struct page **pages = NULL; + struct iov_iter i; + loff_t *ppos = &iocb->ki_pos; loff_t start_pos; ssize_t num_written = 0; ssize_t err = 0; + size_t count; + size_t ocount; int ret = 0; - struct inode *inode = fdentry(file)->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct page **pages = NULL; int nrptrs; - struct page *pinned[2]; unsigned long first_index; unsigned long last_index; int will_write; @@ -888,7 +862,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, pinned[0] = NULL; pinned[1] = NULL; - pos = *ppos; start_pos = pos; vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); @@ -902,6 +875,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, mutex_lock(&inode->i_mutex); + err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); + if (err) + goto out; + count = ocount; + current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) @@ -918,14 +896,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, BTRFS_I(inode)->sequence++; if (unlikely(file->f_flags & O_DIRECT)) { - num_written = __btrfs_direct_write(file, buf, count, pos, - ppos); - pos += num_written; - count -= num_written; + ret = btrfs_check_data_free_space(root, inode, count); + if (ret) + goto out; - /* We've written everything we wanted to, exit */ - if (num_written < 0 || !count) + num_written = generic_file_direct_write(iocb, iov, &nr_segs, + pos, ppos, count, + ocount); + + /* All reservations for DIO are done internally */ + btrfs_free_reserved_data_space(root, inode, count); + + if (num_written < 0) { + ret = num_written; + num_written = 0; + goto out; + } else if (num_written == count) { goto out; + } /* * We are going to do buffered for the rest of the range, so we @@ -933,18 +921,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, * done. */ buffered = 1; - buf += num_written; + pos += num_written; } - nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, - PAGE_CACHE_SIZE / (sizeof(struct page *))); + iov_iter_init(&i, iov, nr_segs, count, num_written); + nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / + PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / + (sizeof(struct page *))); pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); /* generic_write_checks can change our pos */ start_pos = pos; first_index = pos >> PAGE_CACHE_SHIFT; - last_index = (pos + count) >> PAGE_CACHE_SHIFT; + last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; /* * there are lots of better ways to do this, but this code @@ -961,7 +951,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, unlock_page(pinned[0]); } } - if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { + if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { pinned[1] = grab_cache_page(inode->i_mapping, last_index); if (!PageUptodate(pinned[1])) { ret = btrfs_readpage(NULL, pinned[1]); @@ -972,10 +962,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } } - while (count > 0) { + while (iov_iter_count(&i) > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); - size_t write_bytes = min(count, nrptrs * - (size_t)PAGE_CACHE_SIZE - + size_t write_bytes = min(iov_iter_count(&i), + nrptrs * (size_t)PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; @@ -997,7 +987,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } ret = btrfs_copy_from_user(pos, num_pages, - write_bytes, pages, buf); + write_bytes, pages, &i); if (ret) { btrfs_free_reserved_data_space(root, inode, write_bytes); @@ -1026,8 +1016,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, btrfs_throttle(root); } - buf += write_bytes; - count -= write_bytes; pos += write_bytes; num_written += write_bytes; @@ -1222,7 +1210,7 @@ const struct file_operations btrfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .splice_read = generic_file_splice_read, - .write = btrfs_file_write, + .aio_write = btrfs_file_aio_write, .mmap = btrfs_file_mmap, .open = generic_file_open, .release = btrfs_release_file, -- 1.6.6.1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html