For partial extents, snapshot-aware defrag does not work as expected,
since
a) we use the wrong logical offset to search for parents, which should be
   disk_bytenr + extent_offset, not just disk_bytenr,
b) 'offset' returned by the backref walking just refers to key.offset, not
   the 'offset' stored in btrfs_extent_data_ref which is
   (key.offset - extent_offset).

The reproducer:
$ mkfs.btrfs sda
$ mount sda /mnt
$ btrfs sub create /mnt/sub
$ for i in `seq 5 -1 1`; do dd if=/dev/zero of=/mnt/sub/foo bs=5k count=1 
seek=$i conv=notrunc oflag=sync; done
$ btrfs sub snap /mnt/sub /mnt/snap1
$ btrfs sub snap /mnt/sub /mnt/snap2
$ sync; btrfs filesystem defrag /mnt/sub/foo;
$ umount /mnt
$ btrfs-debug-tree sda (Here we can check whether the defrag operation is 
snapshot-awared.

This addresses the above two problems.

Signed-off-by: Liu Bo <bo.li....@oracle.com>
---
 fs/btrfs/inode.c |   16 ++++++++++++----
 1 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c931a4d..ab87862 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2134,16 +2134,23 @@ static noinline int record_one_backref(u64 inum, u64 
offset, u64 root_id,
                if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
                        continue;
 
-               extent_offset = btrfs_file_extent_offset(leaf, extent);
-               if (key.offset - extent_offset != offset)
+               /*
+                * 'offset' refers to the exact key.offset,
+                * NOT the 'offset' field in btrfs_extent_data_ref, ie.
+                * (key.offset - extent_offset).
+                */
+               if (key.offset != offset)
                        continue;
 
+               extent_offset = btrfs_file_extent_offset(leaf, extent);
                num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
+
                if (extent_offset >= old->extent_offset + old->offset +
                    old->len || extent_offset + num_bytes <=
                    old->extent_offset + old->offset)
                        continue;
 
+               ret = 0;
                break;
        }
 
@@ -2155,7 +2162,7 @@ static noinline int record_one_backref(u64 inum, u64 
offset, u64 root_id,
 
        backref->root_id = root_id;
        backref->inum = inum;
-       backref->file_pos = offset + extent_offset;
+       backref->file_pos = offset;
        backref->num_bytes = num_bytes;
        backref->extent_offset = extent_offset;
        backref->generation = btrfs_file_extent_generation(leaf, extent);
@@ -2178,7 +2185,8 @@ static noinline bool record_extent_backrefs(struct 
btrfs_path *path,
        new->path = path;
 
        list_for_each_entry_safe(old, tmp, &new->head, list) {
-               ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+               ret = iterate_inodes_from_logical(old->bytenr +
+                                                 old->extent_offset, fs_info,
                                                  path, record_one_backref,
                                                  old);
                BUG_ON(ret < 0 && ret != -ENOENT);
-- 
1.7.7

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to