In subpagesize-blocksize scenario, if i_size occurs in a block which is not
the last block in the page, then the space to be reserved should be calculated
appropriately.

Reviewed-by: Liu Bo <bo.li....@oracle.com>
Signed-off-by: Chandan Rajendra <chan...@linux.vnet.ibm.com>
---
 fs/btrfs/inode.c | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 266f216..294c503 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8661,11 +8661,24 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, 
struct vm_fault *vmf)
        loff_t size;
        int ret;
        int reserved = 0;
+       u64 reserved_space;
        u64 page_start;
        u64 page_end;
+       u64 end;
+
+       reserved_space = PAGE_CACHE_SIZE;
 
        sb_start_pagefault(inode->i_sb);
-       ret  = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
+
+       /*
+        * Reserving delalloc space after obtaining the page lock can lead to
+        * deadlock. For example, if a dirty page is locked by this function
+        * and the call to btrfs_delalloc_reserve_space() ends up triggering
+        * dirty page write out, then the btrfs_writepage() function could
+        * end up waiting indefinitely to get a lock on the page currently
+        * being processed by btrfs_page_mkwrite() function.
+        */
+       ret  = btrfs_delalloc_reserve_space(inode, reserved_space);
        if (!ret) {
                ret = file_update_time(vma->vm_file);
                reserved = 1;
@@ -8686,6 +8699,7 @@ again:
        size = i_size_read(inode);
        page_start = page_offset(page);
        page_end = page_start + PAGE_CACHE_SIZE - 1;
+       end = page_end;
 
        if ((page->mapping != inode->i_mapping) ||
            (page_start >= size)) {
@@ -8701,7 +8715,7 @@ again:
         * we can't set the delalloc bits if there are pending ordered
         * extents.  Drop our locks and wait for them to finish
         */
-       ordered = btrfs_lookup_ordered_extent(inode, page_start);
+       ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);
        if (ordered) {
                unlock_extent_cached(io_tree, page_start, page_end,
                                     &cached_state, GFP_NOFS);
@@ -8711,6 +8725,18 @@ again:
                goto again;
        }
 
+       if (page->index == ((size - 1) >> PAGE_CACHE_SHIFT)) {
+               reserved_space = round_up(size - page_start, root->sectorsize);
+               if (reserved_space < PAGE_CACHE_SIZE) {
+                       end = page_start + reserved_space - 1;
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents++;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+                       btrfs_delalloc_release_space(inode,
+                                               PAGE_CACHE_SIZE - 
reserved_space);
+               }
+       }
+
        /*
         * XXX - page_mkwrite gets called every time the page is dirtied, even
         * if it was already dirty, so for space accounting reasons we need to
@@ -8718,12 +8744,12 @@ again:
         * is probably a better way to do this, but for now keep consistent with
         * prepare_pages in the normal write path.
         */
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
                          EXTENT_DIRTY | EXTENT_DELALLOC |
                          EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                          0, 0, &cached_state, GFP_NOFS);
 
-       ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
+       ret = btrfs_set_extent_delalloc(inode, page_start, end,
                                        &cached_state);
        if (ret) {
                unlock_extent_cached(io_tree, page_start, page_end,
@@ -8762,7 +8788,7 @@ out_unlock:
        }
        unlock_page(page);
 out:
-       btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
+       btrfs_delalloc_release_space(inode, reserved_space);
 out_noreserve:
        sb_end_pagefault(inode->i_sb);
        return ret;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to