[PATCH v3] btrfs: fiemap: Cache and merge fiemap extent before submit it to user

Qu Wenruo Thu, 06 Apr 2017 19:43:56 -0700

[BUG]
Cycle mount btrfs can cause fiemap to return different result.
Like:
 # mount /dev/vdb5 /mnt/btrfs
 # dd if=/dev/zero bs=16K count=4 oflag=dsync of=/mnt/btrfs/file
 # xfs_io -c "fiemap -v" /mnt/btrfs/file
 /mnt/test/file:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..127]:        25088..25215       128   0x1
 # umount /mnt/btrfs
 # mount /dev/vdb5 /mnt/btrfs
 # xfs_io -c "fiemap -v" /mnt/btrfs/file
 /mnt/test/file:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..31]:         25088..25119        32   0x0
   1: [32..63]:        25120..25151        32   0x0
   2: [64..95]:        25152..25183        32   0x0
   3: [96..127]:       25184..25215        32   0x1
But after above fiemap, we get correct merged result if we call fiemap
again.
 # xfs_io -c "fiemap -v" /mnt/btrfs/file
 /mnt/test/file:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..127]:        25088..25215       128   0x1


[REASON]
Btrfs will try to merge extent map when inserting new extent map.

btrfs_fiemap(start=0 len=(u64)-1)
|- extent_fiemap(start=0 len=(u64)-1)
   |- get_extent_skip_holes(start=0 len=64k)
   |  |- btrfs_get_extent_fiemap(start=0 len=64k)
   |     |- btrfs_get_extent(start=0 len=64k)
   |        |  Found on-disk (ino, EXTENT_DATA, 0)
   |        |- add_extent_mapping()
   |        |- Return (em->start=0, len=16k)
   |
   |- fiemap_fill_next_extent(logic=0 phys=X len=16k)
   |
   |- get_extent_skip_holes(start=0 len=64k)
   |  |- btrfs_get_extent_fiemap(start=0 len=64k)
   |     |- btrfs_get_extent(start=16k len=48k)
   |        |  Found on-disk (ino, EXTENT_DATA, 16k)
   |        |- add_extent_mapping()
   |        |  |- try_merge_map()
   |        |     Merge with previous em start=0 len=16k
   |        |     resulting em start=0 len=32k
   |        |- Return (em->start=0, len=32K)    << Merged result
   |- Stripe off the unrelated range (0~16K) of return em
   |- fiemap_fill_next_extent(logic=16K phys=X+16K len=16K)
      ^^^ Causing split fiemap extent.

And since in add_extent_mapping(), em is already merged, in next
fiemap() call, we will get merged result.

[FIX]
Here we introduce a new structure, fiemap_cache, which records previous
fiemap extent.

And will always try to merge current fiemap_cache result before calling
fiemap_fill_next_extent().
Only when we failed to merge current fiemap extent with cached one, we
will call fiemap_fill_next_extent() to submit cached one.

So by this method, we can merge all fiemap extents.

It can also be done in fs/ioctl.c, however the problem is if
fieinfo->fi_extents_max == 0, we have no space to cache previous fiemap
extent.
So I choose to merge it in btrfs.

Signed-off-by: Qu Wenruo <quwen...@cn.fujitsu.com>
---
v2:
  Since fiemap_extent_info has a limit for number of fiemap_extent, it's 
possible
  that fiemap_fill_next_extent() return 1 halfway. Remove the WARN_ON() which 
can
  cause kernel warning if we fiemap is called on large compressed file.
v3:
  Rename finish_fiemap_extent() to check_fiemap_extent(), as in v3 we ensured
  submit_fiemap_extent() to submit fiemap cache, so it just acts as a
  sanity check.
  Remove BTRFS_MAX_EXTENT_SIZE limit in submit_fiemap_extent(), as
  extent map can be larger than BTRFS_MAX_EXTENT_SIZE.
  Don't do backward jump, suggested by David.
  Better sanity check and recoverable fix.

To David:
  What about adding a btrfs_debug_warn(), which will only call WARN_ON(1) if
  BTRFS_CONFIG_DEBUG is specified for recoverable bug?

  And modify ASSERT() to always WARN_ON() and exit error code?
---
 fs/btrfs/extent_io.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 122 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 28e8192..c4cb65d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4353,6 +4353,123 @@ static struct extent_map *get_extent_skip_holes(struct 
inode *inode,
        return NULL;
 }
 
+/*
+ * To cache previous fiemap extent
+ *
+ * Will be used for merging fiemap extent
+ */
+struct fiemap_cache {
+       u64 offset;
+       u64 phys;
+       u64 len;
+       u32 flags;
+       bool cached;
+};
+
+/*
+ * Helper to submit fiemap extent.
+ *
+ * Will try to merge current fiemap extent specified by @offset, @phys,
+ * @len and @flags with cached one.
+ * And only when we fails to merge, cached one will be submitted as
+ * fiemap extent.
+ *
+ * Return value is the same as fiemap_fill_next_extent().
+ */
+static int submit_fiemap_extent(struct fiemap_extent_info *fieinfo,
+                               struct fiemap_cache *cache,
+                               u64 offset, u64 phys, u64 len, u32 flags)
+{
+       int ret = 0;
+
+       if (!cache->cached)
+               goto assign;
+
+       /*
+        * Sanity check, extent_fiemap() should have ensured that new
+        * fiemap extent won't overlap with cahced one.
+        * Not recoverable.
+        *
+        * NOTE: Physical address can overlap, due to compression
+        */
+       if (cache->offset + cache->len > offset) {
+               WARN_ON(1);
+               return -EINVAL;
+       }
+
+       /*
+        * Only merges fiemap extents if
+        * 1) Their logical addresses are continuous
+        *
+        * 2) Their physical addresses are continuous
+        *    So truly compressed (physical size smaller than logical size)
+        *    extents won't get merged with each other
+        *
+        * 3) Share same flags except FIEMAP_EXTENT_LAST
+        *    So regular extent won't get merged with prealloc extent
+        */
+       if (cache->offset + cache->len  == offset &&
+           cache->phys + cache->len == phys  &&
+           (cache->flags & ~FIEMAP_EXTENT_LAST) ==
+                       (flags & ~FIEMAP_EXTENT_LAST)) {
+               cache->len += len;
+               cache->flags |= flags;
+               goto try_submit_last;
+       }
+
+       /* Not mergeable, need to submit cached one */
+       ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+                                     cache->len, cache->flags);
+       cache->cached = false;
+       if (ret)
+               return ret;
+assign:
+       cache->cached = true;
+       cache->offset = offset;
+       cache->phys = phys;
+       cache->len = len;
+       cache->flags = flags;
+try_submit_last:
+       if (cache->flags & FIEMAP_EXTENT_LAST) {
+               ret = fiemap_fill_next_extent(fieinfo, cache->offset,
+                               cache->phys, cache->len, cache->flags);
+               cache->cached = false;
+       }
+       return ret;
+}
+
+/*
+ * Sanity check for fiemap cache
+ *
+ * All fiemap cache should be submitted by submit_fiemap_extent()
+ * Iteration should be terminated either by last fiemap extent or
+ * fieinfo->fi_extents_max.
+ * So no cached fiemap should exist.
+ */
+static int check_fiemap_cache(struct btrfs_fs_info *fs_info,
+                              struct fiemap_extent_info *fieinfo,
+                              struct fiemap_cache *cache)
+{
+       int ret;
+
+       if (!cache->cached)
+               return 0;
+
+       /* Small and recoverbale problem, only to info developer */
+#ifdef CONFIG_BTRFS_DEBUG
+       WARN_ON(1);
+#endif
+       btrfs_warn(fs_info,
+                  "unhandled fiemap cache detected: offset=%llu phys=%llu 
len=%llu flags=0x%x",
+                  cache->offset, cache->phys, cache->len, cache->flags);
+       ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
+                                     cache->len, cache->flags);
+       cache->cached = false;
+       if (ret > 0)
+               ret = 0;
+       return ret;
+}
+
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len, get_extent_t *get_extent)
 {
@@ -4370,6 +4487,7 @@ int extent_fiemap(struct inode *inode, struct 
fiemap_extent_info *fieinfo,
        struct extent_state *cached_state = NULL;
        struct btrfs_path *path;
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct fiemap_cache cache = { 0 };
        int end = 0;
        u64 em_start = 0;
        u64 em_len = 0;
@@ -4549,8 +4667,8 @@ int extent_fiemap(struct inode *inode, struct 
fiemap_extent_info *fieinfo,
                        flags |= FIEMAP_EXTENT_LAST;
                        end = 1;
                }
-               ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-                                             em_len, flags);
+               ret = submit_fiemap_extent(fieinfo, &cache, em_start, disko,
+                                          em_len, flags);
                if (ret) {
                        if (ret == 1)
                                ret = 0;
@@ -4558,6 +4676,8 @@ int extent_fiemap(struct inode *inode, struct 
fiemap_extent_info *fieinfo,
                }
        }
 out_free:
+       if (!ret)
+               ret = check_fiemap_cache(root->fs_info, fieinfo, &cache);
        free_extent_map(em);
 out:
        btrfs_free_path(path);
-- 
2.9.3



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3] btrfs: fiemap: Cache and merge fiemap extent before submit it to user

Reply via email to