[PATCH v2] fstests: btrfs/168 verify device ready after device delete

2018-07-05 Thread Anand Jain
This test case verifies if the device ready return success after the
device delete.

Signed-off-by: Anand Jain 
---
v1->v2: use _run_btrfs_util_prog instead of open coding it.

 tests/btrfs/168 | 68 +
 tests/btrfs/168.out |  2 ++
 tests/btrfs/group   |  1 +
 3 files changed, 71 insertions(+)
 create mode 100755 tests/btrfs/168
 create mode 100644 tests/btrfs/168.out

diff --git a/tests/btrfs/168 b/tests/btrfs/168
new file mode 100755
index ..0d3e99839209
--- /dev/null
+++ b/tests/btrfs/168
@@ -0,0 +1,68 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2018 Oracle. All Rights Reserved.
+#
+# FS QA Test 168
+#
+# Test if btrfs is still reported ready after the device delete
+#
+# This could be fixed by the following kernel commit:
+#  btrfs: fix missing superblock update in the device delete commit transaction
+#
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+   cd /
+   rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+
+# Modify as appropriate.
+_supported_fs btrfs
+_supported_os Linux
+_require_command "$BTRFS_TUNE_PROG" btrfstune
+_require_scratch_dev_pool 2
+
+_scratch_dev_pool_get 2
+dev_1=$(echo $SCRATCH_DEV_POOL | awk '{print $1}')
+dev_2=$(echo $SCRATCH_DEV_POOL | awk '{print $2}')
+
+# normal delete device and then check for ready
+run_check _scratch_pool_mkfs "-d single -m single"
+_scratch_mount
+_run_btrfs_util_prog device delete $dev_1 $SCRATCH_MNT
+_run_btrfs_util_prog device ready $dev_2
+
+_scratch_unmount
+# delete a seed device and then check for ready
+run_check $BTRFS_TUNE_PROG -S 1 $dev_2
+run_check _mount $dev_2 $SCRATCH_MNT
+_run_btrfs_util_prog device add -f $dev_1 $SCRATCH_MNT
+run_check mount -o rw,remount $dev_1 $SCRATCH_MNT
+_run_btrfs_util_prog device delete $dev_2 $SCRATCH_MNT
+_run_btrfs_util_prog device ready $dev_1
+
+_scratch_unmount
+_scratch_dev_pool_put
+
+echo "Silence is golden"
+
+# success, all done
+status=0
+exit
diff --git a/tests/btrfs/168.out b/tests/btrfs/168.out
new file mode 100644
index ..893a41d859c8
--- /dev/null
+++ b/tests/btrfs/168.out
@@ -0,0 +1,2 @@
+QA output created by 168
+Silence is golden
diff --git a/tests/btrfs/group b/tests/btrfs/group
index 5cff3bd6cc03..7bc3ea457992 100644
--- a/tests/btrfs/group
+++ b/tests/btrfs/group
@@ -170,3 +170,4 @@
 165 auto quick subvol
 166 auto quick qgroup
 167 auto quick replace volume
+168 auto quick volume
-- 
2.15.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] fstests: btrfs/168 verify device ready after device delete

2018-07-05 Thread Anand Jain




On 07/06/2018 12:40 PM, Eryu Guan wrote:

On Tue, Jul 03, 2018 at 04:47:53PM +0800, Anand Jain wrote:

This test case verifies if the device ready return success after the
device delete.

Signed-off-by: Anand Jain 


Looks fine to me overall, but I may need some helps from btrfs folks :)


---
  tests/btrfs/168 | 68 +
  tests/btrfs/168.out |  2 ++
  tests/btrfs/group   |  1 +
  3 files changed, 71 insertions(+)
  create mode 100755 tests/btrfs/168
  create mode 100644 tests/btrfs/168.out

diff --git a/tests/btrfs/168 b/tests/btrfs/168
new file mode 100755
index ..cb5b8eb4b5a8
--- /dev/null
+++ b/tests/btrfs/168
@@ -0,0 +1,68 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2018 Oracle. All Rights Reserved.
+#
+# FS QA Test 168
+#
+# Test if btrfs is still reported ready after the device delete
+#
+# This could be fixed by the following kernel commit:
+#  btrfs: fix missing superblock update in the device delete commit transaction
+#
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+   cd /
+   rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+
+# Modify as appropriate.
+_supported_fs btrfs
+_supported_os Linux
+_require_command "$BTRFS_TUNE_PROG" btrfstune
+_require_scratch_dev_pool 2
+
+_scratch_dev_pool_get 2
+dev_1=$(echo $SCRATCH_DEV_POOL | awk '{print $1}')
+dev_2=$(echo $SCRATCH_DEV_POOL | awk '{print $2}')
+
+# normal delete device and then check for ready
+run_check _scratch_pool_mkfs "-d single -m single"
+_scratch_mount
+run_check $BTRFS_UTIL_PROG device delete $dev_1 $SCRATCH_MNT
+run_check $BTRFS_UTIL_PROG device ready $dev_2


Why not "_run_btrfs_util_prog device delete "


 Err. will fix.


+
+_scratch_unmount
+# delete a seed device and then check for ready
+run_check $BTRFS_TUNE_PROG -S 1 $dev_2
+run_check _mount $dev_2 $SCRATCH_MNT
+_run_btrfs_util_prog device add -f $dev_1 $SCRATCH_MNT


Like this one?



But I still prefer dropping run_check family functions completely, just
using bare command, if the output of these commands are not
deterministic we could simply through them away.


 For now will stick to the run check family. Thanks.

Thanks, Anand


Thanks,
Eryu


+run_check mount -o rw,remount $dev_1 $SCRATCH_MNT
+run_check $BTRFS_UTIL_PROG device delete $dev_2 $SCRATCH_MNT
+run_check $BTRFS_UTIL_PROG device ready $dev_1
+
+_scratch_unmount
+_scratch_dev_pool_put
+
+echo "Silence is golden"
+
+# success, all done
+status=0
+exit
diff --git a/tests/btrfs/168.out b/tests/btrfs/168.out
new file mode 100644
index ..893a41d859c8
--- /dev/null
+++ b/tests/btrfs/168.out
@@ -0,0 +1,2 @@
+QA output created by 168
+Silence is golden
diff --git a/tests/btrfs/group b/tests/btrfs/group
index 5cff3bd6cc03..7bc3ea457992 100644
--- a/tests/btrfs/group
+++ b/tests/btrfs/group
@@ -170,3 +170,4 @@
  165 auto quick subvol
  166 auto quick qgroup
  167 auto quick replace volume
+168 auto quick volume
--
2.15.0

--
To unsubscribe from this list: send the line "unsubscribe fstests" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/2] btrfs: Verify every chunk has corresponding block group at mount time

2018-07-05 Thread Qu Wenruo
If a crafted btrfs has missing block group items, it could cause
unexpected behavior and breaks our expectation on 1:1
chunk<->block group mapping.

Although we added block group -> chunk mapping check, we still need
chunk -> block group mapping check.

This patch will do extra check to ensure each chunk has its
corresponding block group.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847
Reported-by: Xu Wen 
Signed-off-by: Qu Wenruo 
---
changelog:
v2:
  Update return value, explain the @len parameter for
  lookup_extent_mapping(), and remove the ratelimit, all suggested by
  David.
---
 fs/btrfs/extent-tree.c | 57 +-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 63a6b5d36ac1..3578fa5b30ef 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10030,6 +10030,61 @@ btrfs_create_block_group_cache(struct btrfs_fs_info 
*fs_info,
return cache;
 }
 
+
+/*
+ * Iterate all chunks and verify each of them has corresponding block group
+ */
+static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
+{
+   struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+   struct extent_map *em;
+   struct btrfs_block_group_cache *bg;
+   u64 start = 0;
+   int ret = 0;
+
+   while (1) {
+   read_lock(&map_tree->map_tree.lock);
+   /*
+* lookup_extent_mapping will return the first extent map
+* intersects the range, so set @len to 1 is enough to get
+* the first chunk.
+*/
+   em = lookup_extent_mapping(&map_tree->map_tree, start, 1);
+   read_unlock(&map_tree->map_tree.lock);
+   if (!em)
+   break;
+
+   bg = btrfs_lookup_block_group(fs_info, em->start);
+   if (!bg) {
+   btrfs_err(fs_info,
+   "chunk start=%llu len=%llu doesn't have corresponding block group",
+em->start, em->len);
+   ret = -EUCLEAN;
+   free_extent_map(em);
+   break;
+   }
+   if (bg->key.objectid != em->start ||
+   bg->key.offset != em->len ||
+   (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
+   (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+   btrfs_err(fs_info,
+"chunk start=%llu len=%llu flags=0x%llx doesn't match with block group 
start=%llu len=%llu flags=0x%llx",
+   em->start, em->len,
+   em->map_lookup->type & 
BTRFS_BLOCK_GROUP_TYPE_MASK,
+   bg->key.objectid, bg->key.offset,
+   bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+   ret = -EUCLEAN;
+   free_extent_map(em);
+   btrfs_put_block_group(bg);
+   break;
+   }
+   start = em->start + em->len;
+   free_extent_map(em);
+   btrfs_put_block_group(bg);
+   }
+   return ret;
+}
+
 int btrfs_read_block_groups(struct btrfs_fs_info *info)
 {
struct btrfs_path *path;
@@ -10203,7 +10258,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 
btrfs_add_raid_kobjects(info);
init_global_block_rsv(info);
-   ret = 0;
+   ret = check_chunk_block_group_mappings(info);
 error:
btrfs_free_path(path);
return ret;
-- 
2.18.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/2] btrfs: Check each block group has corresponding chunk at mount time

2018-07-05 Thread Qu Wenruo
A crafted btrfs with incorrect chunk<->block group mapping, it could leads
to a lot of unexpected behavior.

Although the crafted image can be catched by block group item checker
added in "[PATCH] btrfs: tree-checker: Verify block_group_item", if one
crafted a valid enough block group item which can pass above check but
still mismatch with existing chunk, it could cause a lot of undefined
behavior.

This patch will add extra block group -> chunk mapping check, to ensure
we have a completely matching (start, len, flags) chunk for each block
group at mount time.

Here we reuse the original find_first_block_group(), which is already
doing basic bg -> chunk check, adding more check on start/len and type
flags.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=199837
Reported-by: Xu Wen 
Signed-off-by: Qu Wenruo 
---
changelog:
v2:
  Reuse existing find_first_block_group() to do the verification,
  pointed out by Gu.
---
 fs/btrfs/extent-tree.c | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3d9fe58c0080..63a6b5d36ac1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9717,6 +9717,8 @@ static int find_first_block_group(struct btrfs_fs_info 
*fs_info,
int ret = 0;
struct btrfs_key found_key;
struct extent_buffer *leaf;
+   struct btrfs_block_group_item bg;
+   u64 flags;
int slot;
 
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
@@ -9751,8 +9753,33 @@ static int find_first_block_group(struct btrfs_fs_info 
*fs_info,
"logical %llu len %llu found bg but no related chunk",
  found_key.objectid, found_key.offset);
ret = -ENOENT;
+   } else if (em->start != found_key.objectid ||
+  em->len != found_key.offset) {
+   btrfs_err(fs_info,
+   "block group %llu len %llu mismatch with chunk %llu len %llu",
+ found_key.objectid, found_key.offset,
+ em->start, em->len);
+   ret = -EUCLEAN;
} else {
-   ret = 0;
+   read_extent_buffer(leaf, &bg,
+   btrfs_item_ptr_offset(leaf, slot),
+   sizeof(bg));
+   flags = btrfs_block_group_flags(&bg) &
+   BTRFS_BLOCK_GROUP_TYPE_MASK;
+
+   if (flags != (em->map_lookup->type &
+ BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+   btrfs_err(fs_info,
+"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 
0x%llx",
+   found_key.objectid,
+   found_key.offset,
+   flags,
+   (BTRFS_BLOCK_GROUP_TYPE_MASK &
+em->map_lookup->type));
+   ret = -EUCLEAN;
+   } else {
+   ret = 0;
+   }
}
free_extent_map(em);
goto out;
-- 
2.18.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 4/4] btrfs-progs: check/original: Don't overwrite return value when we failed to repair

2018-07-05 Thread Gu, Jinxiang


> -Original Message-
> From: linux-btrfs-ow...@vger.kernel.org 
> [mailto:linux-btrfs-ow...@vger.kernel.org] On Behalf Of Qu Wenruo
> Sent: Thursday, July 05, 2018 3:38 PM
> To: linux-btrfs@vger.kernel.org
> Subject: [PATCH 4/4] btrfs-progs: check/original: Don't overwrite return 
> value when we failed to repair
> 
> In check_inode_recs(), for repair mode we always reset @ret to 0.
> It makes no sense and later we check@ret to determine if the repair is
> successful.
> 
> Fix it by removing the offending overwrite.
> 
> Signed-off-by: Qu Wenruo 
> ---
>  check/main.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/check/main.c b/check/main.c
> index 2b5abb2d025b..08476968aaba 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -2764,7 +2764,6 @@ static int check_inode_recs(struct btrfs_root *root,
>   free_inode_rec(rec);
>   continue;
>   }
> - ret = 0;
>   }
> 
>   if (!(repair && ret == 0))
> --

Reviewed-by: Gu Jinxiang 


> 2.18.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 



N�Р骒r��yb�X�肚�v�^�)藓{.n�+�伐�{�n谶�)��骅w*jg�报�茛j/�赇z罐���2���ㄨ��&�)摺�a囤���G���h��j:+v���w��佶

Re: [PATCH] fstests: btrfs/168 verify device ready after device delete

2018-07-05 Thread Eryu Guan
On Tue, Jul 03, 2018 at 04:47:53PM +0800, Anand Jain wrote:
> This test case verifies if the device ready return success after the
> device delete.
> 
> Signed-off-by: Anand Jain 

Looks fine to me overall, but I may need some helps from btrfs folks :)

> ---
>  tests/btrfs/168 | 68 
> +
>  tests/btrfs/168.out |  2 ++
>  tests/btrfs/group   |  1 +
>  3 files changed, 71 insertions(+)
>  create mode 100755 tests/btrfs/168
>  create mode 100644 tests/btrfs/168.out
> 
> diff --git a/tests/btrfs/168 b/tests/btrfs/168
> new file mode 100755
> index ..cb5b8eb4b5a8
> --- /dev/null
> +++ b/tests/btrfs/168
> @@ -0,0 +1,68 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +# Copyright (C) 2018 Oracle. All Rights Reserved.
> +#
> +# FS QA Test 168
> +#
> +# Test if btrfs is still reported ready after the device delete
> +#
> +# This could be fixed by the following kernel commit:
> +#  btrfs: fix missing superblock update in the device delete commit 
> transaction
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1 # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> + cd /
> + rm -f $tmp.*
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +
> +# remove previous $seqres.full before test
> +rm -f $seqres.full
> +
> +# real QA test starts here
> +
> +# Modify as appropriate.
> +_supported_fs btrfs
> +_supported_os Linux
> +_require_command "$BTRFS_TUNE_PROG" btrfstune
> +_require_scratch_dev_pool 2
> +
> +_scratch_dev_pool_get 2
> +dev_1=$(echo $SCRATCH_DEV_POOL | awk '{print $1}')
> +dev_2=$(echo $SCRATCH_DEV_POOL | awk '{print $2}')
> +
> +# normal delete device and then check for ready
> +run_check _scratch_pool_mkfs "-d single -m single"
> +_scratch_mount
> +run_check $BTRFS_UTIL_PROG device delete $dev_1 $SCRATCH_MNT
> +run_check $BTRFS_UTIL_PROG device ready $dev_2

Why not "_run_btrfs_util_prog device delete "

> +
> +_scratch_unmount
> +# delete a seed device and then check for ready
> +run_check $BTRFS_TUNE_PROG -S 1 $dev_2
> +run_check _mount $dev_2 $SCRATCH_MNT
> +_run_btrfs_util_prog device add -f $dev_1 $SCRATCH_MNT

Like this one?

But I still prefer dropping run_check family functions completely, just
using bare command, if the output of these commands are not
deterministic we could simply through them away.

Thanks,
Eryu

> +run_check mount -o rw,remount $dev_1 $SCRATCH_MNT
> +run_check $BTRFS_UTIL_PROG device delete $dev_2 $SCRATCH_MNT
> +run_check $BTRFS_UTIL_PROG device ready $dev_1
> +
> +_scratch_unmount
> +_scratch_dev_pool_put
> +
> +echo "Silence is golden"
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/btrfs/168.out b/tests/btrfs/168.out
> new file mode 100644
> index ..893a41d859c8
> --- /dev/null
> +++ b/tests/btrfs/168.out
> @@ -0,0 +1,2 @@
> +QA output created by 168
> +Silence is golden
> diff --git a/tests/btrfs/group b/tests/btrfs/group
> index 5cff3bd6cc03..7bc3ea457992 100644
> --- a/tests/btrfs/group
> +++ b/tests/btrfs/group
> @@ -170,3 +170,4 @@
>  165 auto quick subvol
>  166 auto quick qgroup
>  167 auto quick replace volume
> +168 auto quick volume
> -- 
> 2.15.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe fstests" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: how to best segment a big block device in resizeable btrfs filesystems?

2018-07-05 Thread Andrei Borzenkov
03.07.2018 10:15, Duncan пишет:
> Andrei Borzenkov posted on Tue, 03 Jul 2018 07:25:14 +0300 as excerpted:
> 
>> 02.07.2018 21:35, Austin S. Hemmelgarn пишет:
>>> them (trimming blocks on BTRFS gets rid of old root trees, so it's a
>>> bit dangerous to do it while writes are happening).
>>
>> Could you please elaborate? Do you mean btrfs can trim data before new
>> writes are actually committed to disk?
> 
> No.
> 
> But normally old roots aren't rewritten for some time simply due to odds 
> (fuller filesystems will of course recycle them sooner), and the btrfs 
> mount option usebackuproot (formerly recovery, until the norecovery mount 
> option that parallels that of other filesystems was added and this option 
> was renamed to avoid confusion) can be used to try an older root if the 
> current root is too damaged to successfully mount.
> > But other than simply by odds not using them again immediately, btrfs has
> no special protection for those old roots, and trim/discard will recover 
> them to hardware-unused as it does any other unused space, tho whether it 
> simply marks them for later processing or actually processes them 
> immediately is up to the individual implementation -- some do it 
> immediately, killing all chances at using the backup root because it's 
> already zeroed out, some don't.
> 

How is it relevant to "while writes are happening"? Will trimming old
tress immediately after writes have stopped be any different? Why?

> In the context of the discard mount option, that can mean there's never 
> any old roots available ever, as they've already been cleaned up by the 
> hardware due to the discard option telling the hardware to do it.
> 
> But even not using that mount option, and simply doing the trims 
> periodically, as done weekly by for instance the systemd fstrim timer and 
> service units, or done manually if you prefer, obviously potentially 
> wipes the old roots at that point.  If the system's effectively idle at 
> the time, not much risk as the current commit is likely to represent a 
> filesystem in full stasis, but if there's lots of writes going on at that 
> moment *AND* the system happens to crash at just the wrong time, before 
> additional commits have recreated at least a bit of root history, again, 
> you'll potentially be left without any old roots for the usebackuproot 
> mount option to try to fall back to, should it actually be necessary.
> 

Sorry? You are just saying that "previous state can be discarded before
new state is committed", just more verbosely.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 3/4] btrfs-progs: check/original: Avoid infinite loop when failed to repair inode

2018-07-05 Thread Gu, Jinxiang


> -Original Message-
> From: linux-btrfs-ow...@vger.kernel.org 
> [mailto:linux-btrfs-ow...@vger.kernel.org] On Behalf Of Qu Wenruo
> Sent: Thursday, July 05, 2018 3:38 PM
> To: linux-btrfs@vger.kernel.org
> Subject: [PATCH 3/4] btrfs-progs: check/original: Avoid infinite loop when 
> failed to repair inode
> 
> Exposed by fuzz-tests/003-multi-check-unmounted/ on fuzzed image
> bko-161811.raw.xz.
> 
> It's caused by the fact when check_fs_roots() finds tree root is
> modified, it re-search tree root by goto again: tag.
> However again: tag will also reset root objectid to 0.
> If we failed to repair one fs root but still modified tree root, we will
> go into such infinite loop.
> 
> Fix it by record which root we should skip for repair mode.
> 
> Signed-off-by: Qu Wenruo 
> ---
>  check/main.c | 19 +--
>  1 file changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/check/main.c b/check/main.c
> index c8c347236543..2b5abb2d025b 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -3380,6 +3380,7 @@ static int check_fs_roots(struct btrfs_fs_info *fs_info,
>   struct extent_buffer *leaf, *tree_node;
>   struct btrfs_root *tmp_root;
>   struct btrfs_root *tree_root = fs_info->tree_root;
> + u64 skip_root = 0;
>   int ret;
>   int err = 0;
> 
> @@ -3400,7 +3401,10 @@ static int check_fs_roots(struct btrfs_fs_info 
> *fs_info,
> 
>  again:
>   key.offset = 0;
> - key.objectid = 0;
> + if (skip_root)
> + key.objectid = skip_root + 1;
> + else
> + key.objectid = 0;
>   key.type = BTRFS_ROOT_ITEM_KEY;
>   ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
>   if (ret < 0) {
> @@ -3409,6 +3413,7 @@ again:
>   }
>   tree_node = tree_root->node;
>   while (1) {
> +
>   if (tree_node != tree_root->node) {
>   free_root_recs_tree(root_cache);
>   btrfs_release_path(&path);
> @@ -3445,8 +3450,18 @@ again:
>   btrfs_release_path(&path);
>   goto again;
>   }
> - if (ret)
> + if (ret) {
>   err = 1;
> +
> + /*
> +  * We failed to repair this root but modified 
> tree
> +  * root, after again: tag we will still hit this
> +  * root and fail to repair, must skip this root 
> to
> +  * avoid infinite loop
> +  */
> + if (repair)
> + skip_root = key.objectid;
> + }
>   if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
>   btrfs_free_fs_root(tmp_root);
>   } else if (key.type == BTRFS_ROOT_REF_KEY ||
> --

Reviewed-by: Gu Jinxiang 


> 2.18.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 





RE: [PATCH 1/4] btrfs-progs: check: Remove the ability to rebuild root overwritting existing tree blocks

2018-07-05 Thread Gu, Jinxiang


> -Original Message-
> From: Qu Wenruo [mailto:quwenruo.bt...@gmx.com]
> Sent: Thursday, July 05, 2018 5:41 PM
> To: Gu, Jinxiang/顾 金香 ; Qu Wenruo ; 
> linux-btrfs@vger.kernel.org
> Subject: Re: [PATCH 1/4] btrfs-progs: check: Remove the ability to rebuild 
> root overwritting existing tree blocks
> 
> 
> 
> On 2018年07月05日 16:50, Gu, Jinxiang wrote:
> >
> >
> >> -Original Message-
> >> From: linux-btrfs-ow...@vger.kernel.org 
> >> [mailto:linux-btrfs-ow...@vger.kernel.org] On Behalf Of Qu Wenruo
> >> Sent: Thursday, July 05, 2018 3:37 PM
> >> To: linux-btrfs@vger.kernel.org
> >> Subject: [PATCH 1/4] btrfs-progs: check: Remove the ability to rebuild 
> >> root overwritting existing tree blocks
> >>
> >> We have function btrfs_fsck_reinit_root() to reinit csum or extent tree.
> >
> > Nit:
> > or fs/file tree
> 
> Nope, btrfs check is only capable of reinit csum or extent tree.
Sorry, not fs/file tree. But DATA RELOC TREE.
Call stack is reset_balance -> btrfs_fsck_reinit_root, and the root is get by
btrfs_read_fs_root(fs_info, &key), which key is
(BTRFS_DATA_RELOC_TREE_OBJECTID, BTRFS_ROOT_ITEM_KEY , (u64)-1)

> 
> Thanks,
> Qu
> 
> >
> >> However this function allows us to let it overwrite existing tree blocks
> >> using @overwrite parameter.
> >>
> >> Such behavior is pretty dangerous while no caller is using this feature
> >> explicitly.
> >>
> >> So just remove @overwrite parameter and allow btrfs_fsck_reinit_root()
> >> to error out when it fails to allocate tree block.
> >>
> >> Signed-off-by: Qu Wenruo 
> >> ---
> >>  check/main.c | 26 --
> >>  1 file changed, 8 insertions(+), 18 deletions(-)
> >>
> >> diff --git a/check/main.c b/check/main.c
> >> index 8db300abb825..c8c347236543 100644
> >> --- a/check/main.c
> >> +++ b/check/main.c
> >> @@ -8379,7 +8379,7 @@ static int do_check_chunks_and_extents(struct 
> >> btrfs_fs_info *fs_info)
> >>  }
> >>
> >>  static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
> >> - struct btrfs_root *root, int overwrite)
> >> +struct btrfs_root *root)
> >>  {
> >>struct extent_buffer *c;
> >>struct extent_buffer *old = root->node;
> >> @@ -8389,21 +8389,13 @@ static int btrfs_fsck_reinit_root(struct 
> >> btrfs_trans_handle *trans,
> >>
> >>level = 0;
> >>
> >> -  if (overwrite) {
> >> -  c = old;
> >> -  extent_buffer_get(c);
> >> -  goto init;
> >> -  }
> >>c = btrfs_alloc_free_block(trans, root,
> >>   root->fs_info->nodesize,
> >>   root->root_key.objectid,
> >>   &disk_key, level, 0, 0);
> >> -  if (IS_ERR(c)) {
> >> -  c = old;
> >> -  extent_buffer_get(c);
> >> -  overwrite = 1;
> >> -  }
> >> -init:
> >> +  if (IS_ERR(c))
> >> +  return PTR_ERR(c);
> >> +
> >>memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
> >>btrfs_set_header_level(c, level);
> >>btrfs_set_header_bytenr(c, c->start);
> >> @@ -8422,9 +8414,7 @@ init:
> >>/*
> >> * this case can happen in the following case:
> >> *
> >> -   * 1.overwrite previous root.
> >> -   *
> >> -   * 2.reinit reloc data root, this is because we skip pin
> >> +   * reinit reloc data root, this is because we skip pin
> >> * down reloc data tree before which means we can allocate
> >> * same block bytenr here.
> >> */
> >> @@ -8609,7 +8599,7 @@ reinit_data_reloc:
> >>goto out;
> >>}
> >>record_root_in_trans(trans, root);
> >> -  ret = btrfs_fsck_reinit_root(trans, root, 0);
> >> +  ret = btrfs_fsck_reinit_root(trans, root);
> >>if (ret)
> >>goto out;
> >>ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
> >> @@ -8675,7 +8665,7 @@ again:
> >>}
> >>
> >>/* Ok we can allocate now, reinit the extent root */
> >> -  ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
> >> +  ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root);
> >>if (ret) {
> >>fprintf(stderr, "extent root initialization failed\n");
> >>/*
> >> @@ -9764,7 +9754,7 @@ int cmd_check(int argc, char **argv)
> >>
> >>if (init_csum_tree) {
> >>printf("Reinitialize checksum tree\n");
> >> -  ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
> >> +  ret = btrfs_fsck_reinit_root(trans, info->csum_root);
> >>if (ret) {
> >>error("checksum tree initialization failed: %d",
> >>ret);
> >> --
> >
> > Reviewed-by: Gu Jinxiang 
> >
> >
> >> 2.18.0
> >>
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> >> the body of a message to majord...@vger.kernel.org
> >> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >>
> >
> >
> >
> > N嫥叉靣笡y氊b瞂千v豝�)藓{.n�+壏{眓谶�

Re: [PATCH 5/5] btrfs: Verify every chunk has corresponding block group at mount time

2018-07-05 Thread Qu Wenruo



On 2018年07月04日 17:46, Qu Wenruo wrote:
> 
> 
> On 2018年07月04日 15:08, Nikolay Borisov wrote:
>>
>>
>> On  3.07.2018 12:10, Qu Wenruo wrote:
>>> If a crafted btrfs has missing block group items, it could cause
>>> unexpected behavior and breaks our expectation on 1:1
>>> chunk<->block group mapping.
>>>
>>> Although we added block group -> chunk mapping check, we still need
>>> chunk -> block group mapping check.
>>>
>>> This patch will do extra check to ensure each chunk has its
>>> corresponding block group.
>>>
>>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847
>>> Reported-by: Xu Wen 
>>> Signed-off-by: Qu Wenruo 
>>> ---
>>>  fs/btrfs/extent-tree.c | 52 +-
>>>  1 file changed, 51 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>>> index 82b446f014b9..746095034ca2 100644
>>> --- a/fs/btrfs/extent-tree.c
>>> +++ b/fs/btrfs/extent-tree.c
>>> @@ -10038,6 +10038,56 @@ static int check_exist_chunk(struct btrfs_fs_info 
>>> *fs_info, u64 start, u64 len,
>>> return ret;
>>>  }
>>>  
>>> +/*
>>> + * Iterate all chunks and verify each of them has corresponding block group
>>> + */
>>> +static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
>>> +{
>>> +   struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
>>> +   struct extent_map *em;
>>> +   struct btrfs_block_group_cache *bg;
>>> +   u64 start = 0;
>>> +   int ret = 0;
>>> +
>>> +   while (1) {
>>> +   read_lock(&map_tree->map_tree.lock);
>>> +   em = lookup_extent_mapping(&map_tree->map_tree, start,
>>> +  (u64)-1 - start);
>> len parameter of lookup_extent_mapping eventually ends up in range_end.
>> Meaning it will just return -1. Why not use just -1 for len. Looking at
>> the rest of the code this seems to be the convention. But then there are
>> several places where 1 is passed as well. Hm, in any case a single
>> number is simpler than an expression.
> 
> I still like to be accurate here, since it's @len, then we should follow
> its naming.
> Although we have range_end() for correct careless caller, it still
> doesn't sound good just passing -1 as @len.
> 
>>
>>> +   read_unlock(&map_tree->map_tree.lock);
>>> +   if (!em)
>>> +   break;
>>> +
>>> +   bg = btrfs_lookup_block_group(fs_info, em->start);
>>> +   if (!bg) {
>>> +   btrfs_err_rl(fs_info,
>>> +   "chunk start=%llu len=%llu doesn't have corresponding block group",
>>> +em->start, em->len);
>>> +   ret = -ENOENT;
>>> +   free_extent_map(em);
>>> +   break;
>>> +   }
>>> +   if (bg->key.objectid != em->start ||
>>> +   bg->key.offset != em->len ||
>>> +   (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
>>> +   (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
>>> +   btrfs_err_rl(fs_info,
>>> +"chunk start=%llu len=%llu flags=0x%llx doesn't match with block group 
>>> start=%llu len=%llu flags=0x%llx",
>>> +   em->start, em->len,
>>> +   em->map_lookup->type & 
>>> BTRFS_BLOCK_GROUP_TYPE_MASK,
>>> +   bg->key.objectid, bg->key.offset,
>>> +   bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
>>> +   ret = -EUCLEAN;
>>> +   free_extent_map(em);
>>> +   btrfs_put_block_group(bg);
>>> +   break;
>>> +   }
>>> +   start = em->start + em->len;
>>> +   free_extent_map(em);
>>> +   btrfs_put_block_group(bg);
>>> +   }
>>> +   return ret;
>>> +}
>>> +
>>>  int btrfs_read_block_groups(struct btrfs_fs_info *info)
>>>  {
>>> struct btrfs_path *path;
>>> @@ -10227,7 +10277,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info 
>>> *info)
>>>  
>>> btrfs_add_raid_kobjects(info);
>>> init_global_block_rsv(info);
>>> -   ret = 0;
>>> +   ret = check_chunk_block_group_mappings(info);
>>
>> Rather than doing that can we just get the count of chunks. Then if we
>> have as many chunks as BG have been read in and we know the BG -> chunk
>> mapping check has passed we can assume that chunks also map to BG
>> without going through all chunks.
> 
> Nope, just as the checks done in that function, we must ensure not only
> the number of bgs/chunks matches, but *each* chunk must have a block
> group with the same size, length and type flags.

Thanks to Gu's comment, there in find_first_block() we have already done
extra check to ensure every block group we're adding has a corresponding
chunk, thus just doing chunk/bg counting should be able to detect
missing block groups.

I'll try this method to reduce unnecessary block group lookup in next
version.

Thanks,
Qu

> 
> If we have a block group doesn't match its size/length, it's pretty

Re: [PATCH 5/5] btrfs: Verify every chunk has corresponding block group at mount time

2018-07-05 Thread Qu Wenruo


On 2018年07月05日 23:18, David Sterba wrote:
> On Tue, Jul 03, 2018 at 05:10:09PM +0800, Qu Wenruo wrote:
>> If a crafted btrfs has missing block group items, it could cause
>> unexpected behavior and breaks our expectation on 1:1
>> chunk<->block group mapping.
>>
>> Although we added block group -> chunk mapping check, we still need
>> chunk -> block group mapping check.
>>
>> This patch will do extra check to ensure each chunk has its
>> corresponding block group.
>>
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847
>> Reported-by: Xu Wen 
>> Signed-off-by: Qu Wenruo 
>> ---
>>  fs/btrfs/extent-tree.c | 52 +-
>>  1 file changed, 51 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>> index 82b446f014b9..746095034ca2 100644
>> --- a/fs/btrfs/extent-tree.c
>> +++ b/fs/btrfs/extent-tree.c
>> @@ -10038,6 +10038,56 @@ static int check_exist_chunk(struct btrfs_fs_info 
>> *fs_info, u64 start, u64 len,
>>  return ret;
>>  }
>>  
>> +/*
>> + * Iterate all chunks and verify each of them has corresponding block group
>> + */
>> +static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
>> +{
>> +struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
>> +struct extent_map *em;
>> +struct btrfs_block_group_cache *bg;
>> +u64 start = 0;
>> +int ret = 0;
>> +
>> +while (1) {
>> +read_lock(&map_tree->map_tree.lock);
>> +em = lookup_extent_mapping(&map_tree->map_tree, start,
>> +   (u64)-1 - start);
> 
> This needs a comment.

For the @len part?

> 
>> +read_unlock(&map_tree->map_tree.lock);
>> +if (!em)
>> +break;
>> +
>> +bg = btrfs_lookup_block_group(fs_info, em->start);
>> +if (!bg) {
>> +btrfs_err_rl(fs_info,
>> +"chunk start=%llu len=%llu doesn't have corresponding block group",
>> + em->start, em->len);
>> +ret = -ENOENT;
> 
> -EUCLEAN ?

Either works for me.

> 
>> +free_extent_map(em);
>> +break;
>> +}
>> +if (bg->key.objectid != em->start ||
>> +bg->key.offset != em->len ||
>> +(bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
>> +(em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
>> +btrfs_err_rl(fs_info,
> 
> Why is this ratelmited? I'd understand that a fuzzed image will spew a
> lot of these errors but for a normal case, it should be ok to print all
> the messages.

Well, even for fuzzed image it won't trigger twice, the first time it
triggers we will error our, so indeed no need to rate the limit anyway.

Thanks,
Qu

> 
>> +"chunk start=%llu len=%llu flags=0x%llx doesn't match with block group 
>> start=%llu len=%llu flags=0x%llx",
>> +em->start, em->len,
>> +em->map_lookup->type & 
>> BTRFS_BLOCK_GROUP_TYPE_MASK,
>> +bg->key.objectid, bg->key.offset,
>> +bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
>> +ret = -EUCLEAN;
>> +free_extent_map(em);
>> +btrfs_put_block_group(bg);
>> +break;
>> +}
>> +start = em->start + em->len;
>> +free_extent_map(em);
>> +btrfs_put_block_group(bg);
>> +}
>> +return ret;
>> +}
>> +
>>  int btrfs_read_block_groups(struct btrfs_fs_info *info)
>>  {
>>  struct btrfs_path *path;
>> @@ -10227,7 +10277,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info 
>> *info)
>>  
>>  btrfs_add_raid_kobjects(info);
>>  init_global_block_rsv(info);
>> -ret = 0;
>> +ret = check_chunk_block_group_mappings(info);
>>  error:
>>  btrfs_free_path(path);
>>  return ret;
>> -- 
>> 2.18.0
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 4/5] btrfs: Check each block group has corresponding chunk at mount time

2018-07-05 Thread Qu Wenruo


On 2018年07月04日 13:45, Gu, Jinxiang wrote:
> 
> 
>> -Original Message-
>> From: linux-btrfs-ow...@vger.kernel.org 
>> [mailto:linux-btrfs-ow...@vger.kernel.org] On Behalf Of Qu Wenruo
>> Sent: Tuesday, July 03, 2018 5:10 PM
>> To: linux-btrfs@vger.kernel.org
>> Subject: [PATCH 4/5] btrfs: Check each block group has corresponding chunk 
>> at mount time
>>
>> A crafted btrfs with incorrect chunk<->block group mapping, it could leads
>> to a lot of unexpected behavior.
>>
>> Although the crafted image can be catched by block group item checker
>> added in "[PATCH] btrfs: tree-checker: Verify block_group_item", if one
>> crafted a valid enough block group item which can pass above check but
>> still mismatch with existing chunk, it could cause a lot of undefined
>> behavior.
>>
>> This patch will add extra block group -> chunk mapping check, to ensure
>> we have a completely matching (start, len, flags) chunk for each block
>> group at mount time.
>>
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=199837
>> Reported-by: Xu Wen 
>> Signed-off-by: Qu Wenruo 
>> ---
>>  fs/btrfs/extent-tree.c | 55 --
>>  1 file changed, 53 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>> index 3d9fe58c0080..82b446f014b9 100644
>> --- a/fs/btrfs/extent-tree.c
>> +++ b/fs/btrfs/extent-tree.c
>> @@ -10003,6 +10003,41 @@ btrfs_create_block_group_cache(struct btrfs_fs_info 
>> *fs_info,
>>  return cache;
>>  }
>>
>> +static int check_exist_chunk(struct btrfs_fs_info *fs_info, u64 start, u64 
>> len,
>> + u64 flags)
>> +{
>> +struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
>> +struct extent_map *em;
>> +int ret;
>> +
>> +read_lock(&map_tree->map_tree.lock);
>> +em = lookup_extent_mapping(&map_tree->map_tree, start, len);
>> +read_unlock(&map_tree->map_tree.lock);
>> +
>> +if (!em) {
>> +btrfs_err_rl(fs_info,
>> +"block group start=%llu len=%llu doesn't have corresponding chunk",
>> + start, len);
>> +ret = -ENOENT;
>> +goto out;
>> +}
> 
> This check has been done in find_first_block_group which has been called 
> before
> check_exist_chunk be called.

Oh, yes, find_first_block_group() indeed does this check, so there is no
need for check_exsist_chunk().
> 
>> +if (em->start != start || em->len != len ||
>> +(em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
>> +(flags & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
>> +btrfs_err_rl(fs_info,
>> +"block group start=%llu len=%llu flags=0x%llx doesn't match with chunk 
>> start=%llu len=%llu flags=0x%llx",
>> + start, len , flags & BTRFS_BLOCK_GROUP_TYPE_MASK,
>> + em->start, em->len, em->map_lookup->type &
>> + BTRFS_BLOCK_GROUP_TYPE_MASK);
>> +ret = -EUCLEAN;
>> +goto out;
>> +}
> Should this check also be added to find_first_block_group?

Yep.

Thanks,
Qu

> 
>> +ret = 0;
>> +out:
>> +free_extent_map(em);
>> +return ret;
>> +}
>> +
>>  int btrfs_read_block_groups(struct btrfs_fs_info *info)
>>  {
>>  struct btrfs_path *path;
>> @@ -10036,6 +10071,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info 
>> *info)
>>  need_clear = 1;
>>
>>  while (1) {
>> +struct btrfs_block_group_item bg;
>> +int slot;
>> +
>>  ret = find_first_block_group(info, path, &key);
>>  if (ret > 0)
>>  break;
>> @@ -10043,7 +10081,20 @@ int btrfs_read_block_groups(struct btrfs_fs_info 
>> *info)
>>  goto error;
>>
>>  leaf = path->nodes[0];
>> -btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
>> +slot = path->slots[0];
>> +btrfs_item_key_to_cpu(leaf, &found_key, slot);
>> +
>> +read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot),
>> +   sizeof(bg));
>> +/*
>> + * Chunk and block group must have 1:1 mapping.
>> + * So there must be a chunk for this block group.
>> + */
>> +ret = check_exist_chunk(info, found_key.objectid,
>> +found_key.offset,
>> +btrfs_block_group_flags(&bg));
>> +if (ret < 0)
>> +goto error;
>>
>>  cache = btrfs_create_block_group_cache(info, found_key.objectid,
>> found_key.offset);
>> @@ -10068,7 +10119,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info 
>> *info)
>>  }
>>
>>  read_extent_buffer(leaf, &cache->item,
>> -   btrfs_item_ptr_offset(leaf, path->slots[0]),
>> +   btrfs_item_ptr_offset(leaf, slo

Re: [PATCH] btrfs-progs: check: Fix wrong error message in case of corrupted extent

2018-07-05 Thread David Sterba
On Tue, Jun 05, 2018 at 02:39:10PM +0300, Nikolay Borisov wrote:
> When btrfs check detects a freespace tree extent which ends beyond the
> blockgroup containing it a misleading error messages is printed. For
> example if we have the following extent in the freespace tree:
> 
> item 5 key (30408704 FREE_SPACE_INFO 1073741824) itemoff 16259 itemsize 8
> free space info extent count 3 flags 0
> item 6 key (30425088 FREE_SPACE_EXTENT 49152) itemoff 16259 itemsize 0
> free space extent
> item 7 key (30507008 FREE_SPACE_EXTENT 65536) itemoff 16259 itemsize 0
> free space extent
> item 8 key (30654464 FREE_SPACE_EXTENT 14524648038063310901) itemoff 
> 16259 itemsize 0
> 
> Clearly the last extent is corrupted so we should print something
> along the lines of:
> 
> free space extent ends at 14524648038063310901, beyond end of block group 
> 30408704-1104150528
> 
> Instead currently this is printed:
> 
> free space extent ends at 30654464, beyond end of block group 
> 30408704-1104150528
> 
> So instead of printing the actual erroneous end, we print the beginning
> of the extent. Fix this by printing the actual corrupted end.
> 
> Signed-off-by: Nikolay Borisov 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: print bytenr of tree block in print_tree_block_error()

2018-07-05 Thread David Sterba
On Thu, Jun 14, 2018 at 10:40:39AM +0800, Su Yue wrote:
> For easier debug, let print_tree_block_error() print bytenr of
> tree block.
> 
> Signed-off-by: Su Yue 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: remove warnings superseded by refcount_t usage

2018-07-05 Thread David Sterba
On Fri, Jun 29, 2018 at 04:05:40PM +0200, David Sterba wrote:
> On Tue, Jun 26, 2018 at 05:00:46PM +0300, Nikolay Borisov wrote:
> > On 25.06.2018 19:38, David Sterba wrote:
> > > There are several WARN_ON calls that catch incrorrect reference counter
> > > updates, but this is what the refcount_t does already:
> > > 
> > > * refcount_inc from 0 will warn
> > > * refcount_dec_and_test from 0 will warn
> > > 
> > 
> > But these warnings are only going to be triggered in the case of
> > CONFIG_REFCOUNT_FULL otherwise refcount falls back to plain atomic
> > operations.
> 
> Right, I'm for REFCOUNT_FULL on by default obviously, but we need to
> take handle the case where it's not. And there isn't a refcount type
> with the full semantics.

There refcount helpers will be extended to allow the full check
semantics so I'll use them and drop the WARN_ONs once the patch gets
merged.

https://lore.kernel.org/lkml/20180703100102.16615-1-mark.rutl...@arm.com/
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 2/2] btrfs: fix missing superblock update in the device delete commit transaction

2018-07-05 Thread David Sterba
On Tue, Jul 03, 2018 at 05:07:24PM +0800, Anand Jain wrote:
> When a device is deleted, the btrfs_super_block::number_devices is
> reduced by 1, but we do that after the commit transaction, so this
> change did not made it to the disk and waits for the next commit
> transaction whenever it happens.
> 
> This can be easily demonstrated using the following test case where I
> use the btrfs device ready cli to read the disk and report.
> 
>   mkfs.btrfs -fq -dsingle -msingle $dev1 $dev2
>   mount $dev1 /btrfs
>   btrfs dev del $dev2 /btrfs
>   btrfs dev ready $dev1; echo RESULT=$? <-- 1
>Without this patch RESULT returns 1, indicating not ready!
> 
>   Testing with a seed device:
> 
>   mkfs.btrfs -fq $dev1
>   btrfstune -S1 $dev1
>   mount $dev1 /btrfs
>   btrfs dev add -f $dev2 /btrfs
>   umount /btrfs
>   mount $dev2 /btrfs
>   btrfs dev del $dev1 /btrfs
>   btrfs dev ready $dev2; echo RESULT=$? <-- 1
> 
>   Fix this by bringing in the num_devices update with in the
>   current commit transaction.
>   Also align the local variable declarations in the function
>   btrfs_rm_dev_item()
>   Delete a todo comment about transient inconsistent state
> 
> Signed-off-by: Anand Jain 
> ---
> v1->v2:
>  Delete a todo comment
>  Refactor btrfs_rm_dev_item to use trans->fs_info and drop fs_info in
> the argument
>  
>  fs/btrfs/volumes.c | 46 +++---
>  1 file changed, 19 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index b5a60ab37a1c..1bdfd5e05bb5 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -1822,47 +1822,32 @@ static void update_dev_time(const char *path_name)
>   filp_close(filp, NULL);
>  }
>  
> -static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info,
> +static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
>struct btrfs_device *device)
>  {
> - struct btrfs_root *root = fs_info->chunk_root;
> - int ret;
> + struct btrfs_root *root = trans->fs_info->chunk_root;
>   struct btrfs_path *path;
>   struct btrfs_key key;
> - struct btrfs_trans_handle *trans;
> + int ret;
>  
>   path = btrfs_alloc_path();
>   if (!path)
>   return -ENOMEM;
>  
> - trans = btrfs_start_transaction(root, 0);
> - if (IS_ERR(trans)) {
> - btrfs_free_path(path);
> - return PTR_ERR(trans);
> - }
>   key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
>   key.type = BTRFS_DEV_ITEM_KEY;
>   key.offset = device->devid;
>  
>   ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
> - if (ret) {
> - if (ret > 0)
> - ret = -ENOENT;
> - btrfs_abort_transaction(trans, ret);
> - btrfs_end_transaction(trans);
> + if (ret > 0) {
> + ret = -ENOENT;
>   goto out;
>   }
>  
>   ret = btrfs_del_item(trans, root, path);
> - if (ret) {
> - btrfs_abort_transaction(trans, ret);
> - btrfs_end_transaction(trans);
> - }
>  
>  out:
>   btrfs_free_path(path);
> - if (!ret)
> - ret = btrfs_commit_transaction(trans);
>   return ret;
>  }
>  
> @@ -1946,7 +1931,9 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, 
> const char *device_path,
>   u64 devid)
>  {
>   struct btrfs_device *device;
> + struct btrfs_trans_handle *trans;
>   struct btrfs_fs_devices *cur_devices;
> + struct btrfs_root *root = fs_info->dev_root;
>   struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
>   u64 num_devices;
>   int ret = 0;
> @@ -1994,14 +1981,18 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, 
> const char *device_path,
>   if (ret)
>   goto error_undo;
>  
> - /*
> -  * TODO: the superblock still includes this device in its num_devices
> -  * counter although write_all_supers() is not locked out. This
> -  * could give a filesystem state which requires a degraded mount.
> -  */
> - ret = btrfs_rm_dev_item(fs_info, device);
> - if (ret)
> + trans = btrfs_start_transaction(root, 0);
> + if (IS_ERR(trans)) {
> + ret = PTR_ERR(trans);
>   goto error_undo;
> + }
> +
> + ret = btrfs_rm_dev_item(trans, device);
> + if (ret) {
> + btrfs_abort_transaction(trans, ret);
> + btrfs_end_transaction(trans);
> + goto error_undo;
> + }
>  
>   clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
>   btrfs_scrub_cancel_dev(fs_info, device);
> @@ -2070,6 +2061,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, 
> const char *device_path,
>   free_fs_devices(cur_devices);
>   }
>  
> + ret = btrfs_commit_transaction(trans);

The transaction now spans more operations, eg. scratching the old
superblocks and freeing the block device, but this looks ok. I'd be
slightly worried about the 

Re: [PATCH 0/5] Enhancement for block group/chunk verification

2018-07-05 Thread David Sterba
On Thu, Jul 05, 2018 at 09:36:47AM +0800, Qu Wenruo wrote:
> 
> 
> On 2018年07月04日 21:36, David Sterba wrote:
> > On Tue, Jul 03, 2018 at 05:10:04PM +0800, Qu Wenruo wrote:
> >> Can be fetched from github, which is based on v4.18-rc1 tag:
> >> https://github.com/adam900710/linux/tree/tree_checker_enhance
> >>
> >> Reported by Xu Wen , some crafted btrfs image can
> >> cause unexpected kernel behavior.
> >>
> >> All of them are related to block group and chunk, so this patchset will
> >> enhance block group and chunk verification, so kernel can detect them
> >> and error out gracefully (with user friendly error message showing
> >> what's going wrong)
> >>
> >> Obvious corruption (don't need to cross check with chunk/block group),
> >> will be addressed by enhanced tree-checker.
> >> (Most crafted images will be caught by tree-checker)
> >>
> >> More complex corruption will be addressed mostly at
> >> btrfs_read_block_groups(), doing extra cross reference check for
> >> chunk<->block group mapping.
> >> It may cause extra mount time, but compared to the existing time
> >> consuming block group items search, all added check is done completely
> >> in memory using rb_tree, so it shouldn't add too much overhead.
> >>
> >> Qu Wenruo (5):
> >>   btrfs: tree-checker: Verify block_group_item
> >>   btrfs: tree-checker: Detect invalid empty essential tree
> >>   btrfs: relocation: Only remove reloc rb_trees if reloc control has
> >> been initialized
> >>   btrfs: Check each block group has corresponding chunk at mount time
> >>   btrfs: Verify every chunk has corresponding block group at mount time
> > 
> > Patches 1-3 queued, thanks. 4 and 5 have some comments.
> 
> Did I miss the comments for 4 and 5?

Gu has a question regarding patch 4 a and 5 now from me too.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] btrfs: Verify every chunk has corresponding block group at mount time

2018-07-05 Thread David Sterba
On Tue, Jul 03, 2018 at 05:10:09PM +0800, Qu Wenruo wrote:
> If a crafted btrfs has missing block group items, it could cause
> unexpected behavior and breaks our expectation on 1:1
> chunk<->block group mapping.
> 
> Although we added block group -> chunk mapping check, we still need
> chunk -> block group mapping check.
> 
> This patch will do extra check to ensure each chunk has its
> corresponding block group.
> 
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=199847
> Reported-by: Xu Wen 
> Signed-off-by: Qu Wenruo 
> ---
>  fs/btrfs/extent-tree.c | 52 +-
>  1 file changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 82b446f014b9..746095034ca2 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -10038,6 +10038,56 @@ static int check_exist_chunk(struct btrfs_fs_info 
> *fs_info, u64 start, u64 len,
>   return ret;
>  }
>  
> +/*
> + * Iterate all chunks and verify each of them has corresponding block group
> + */
> +static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
> +{
> + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> + struct extent_map *em;
> + struct btrfs_block_group_cache *bg;
> + u64 start = 0;
> + int ret = 0;
> +
> + while (1) {
> + read_lock(&map_tree->map_tree.lock);
> + em = lookup_extent_mapping(&map_tree->map_tree, start,
> +(u64)-1 - start);

This needs a comment.

> + read_unlock(&map_tree->map_tree.lock);
> + if (!em)
> + break;
> +
> + bg = btrfs_lookup_block_group(fs_info, em->start);
> + if (!bg) {
> + btrfs_err_rl(fs_info,
> + "chunk start=%llu len=%llu doesn't have corresponding block group",
> +  em->start, em->len);
> + ret = -ENOENT;

-EUCLEAN ?

> + free_extent_map(em);
> + break;
> + }
> + if (bg->key.objectid != em->start ||
> + bg->key.offset != em->len ||
> + (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
> + (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
> + btrfs_err_rl(fs_info,

Why is this ratelmited? I'd understand that a fuzzed image will spew a
lot of these errors but for a normal case, it should be ok to print all
the messages.

> +"chunk start=%llu len=%llu flags=0x%llx doesn't match with block group 
> start=%llu len=%llu flags=0x%llx",
> + em->start, em->len,
> + em->map_lookup->type & 
> BTRFS_BLOCK_GROUP_TYPE_MASK,
> + bg->key.objectid, bg->key.offset,
> + bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
> + ret = -EUCLEAN;
> + free_extent_map(em);
> + btrfs_put_block_group(bg);
> + break;
> + }
> + start = em->start + em->len;
> + free_extent_map(em);
> + btrfs_put_block_group(bg);
> + }
> + return ret;
> +}
> +
>  int btrfs_read_block_groups(struct btrfs_fs_info *info)
>  {
>   struct btrfs_path *path;
> @@ -10227,7 +10277,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info 
> *info)
>  
>   btrfs_add_raid_kobjects(info);
>   init_global_block_rsv(info);
> - ret = 0;
> + ret = check_chunk_block_group_mappings(info);
>  error:
>   btrfs_free_path(path);
>   return ret;
> -- 
> 2.18.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] btrfs: qgroups: Move transaction management inside btrfs_quota_enable/disable

2018-07-05 Thread David Sterba
On Thu, Jul 05, 2018 at 02:50:48PM +0300, Nikolay Borisov wrote:
> Commit 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to
> btrfs_quota_enable") not only resulted in an easier to follow code but
> it also introduced a subtle bug. It changed the timing when the initial
> transaction rescan was happening - before the commit it would happen
> after transaction commit had occured but after the commit it might happen
> before the transaction was committed. This results in failure to
> correctly rescan the quota since there could be data which is still not
> committed on disk.
> 
> This patch aims to fix this by movign the transaction creation/commit
> inside btrfs_quota_enable, which allows to schedule the quota commit
> after the transaction has been committed.
> 
> Fixes: 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to 
> btrfs_quota_enable")
> Reported-by: Misono Tomohiro 
> Link: https://marc.info/?l=linux-btrfs&m=152999289017582
> Signed-off-by: Nikolay Borisov 

Added to misc-next with the fixups (goto and whitespace).
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] btrfs: qgroups: Move transaction management inside btrfs_quota_enable/disable

2018-07-05 Thread David Sterba
On Thu, Jul 05, 2018 at 02:50:48PM +0300, Nikolay Borisov wrote:
> @@ -921,8 +938,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
>  
>   ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
> sizeof(*ptr));
> - if (ret)
> + if (ret) {
>   goto out_free_path;
> + btrfs_abort_transaction(trans, ret);

Strange that gcc does not warn about that, and there's no warning switch
to detect dead code.

> @@ -3070,7 +3128,7 @@ static int __btrfs_qgroup_release_data(struct inode 
> *inode,
>   if (free && reserved)
>   return qgroup_free_reserved_data(inode, reserved, start, len);
>   extent_changeset_init(&changeset);
> - ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 
> + ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start,

So the whitespace hunk still there?

>   start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
>   if (ret < 0)
>   goto out;
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] btrfs: Add chunk type check in read a chunk

2018-07-05 Thread David Sterba
On Wed, Jul 04, 2018 at 06:16:39PM +0800, Gu Jinxiang wrote:
> Reported in https://bugzilla.kernel.org/show_bug.cgi?id=199839,
> which has a invalid chunk, not return error opportunlly.
> 
> Add chunk type check in btrfs_check_chunk_valid,
> to make error be returned in advance.
> 
> Reported-by: Xu Wen 
> Signed-off-by: Gu Jinxiang 
> Reviewed-by: Qu Wenruo 
> ---
> changelog:
> v3:
> deal with comment by Nikolay Borisov, change the error message
> more appropriate.
> 
> v2:
> deal with comment by Qu, add precise check for chunk type.

Added to misc-next with some updates, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] btrfs: Add chunk type check in read a chunk

2018-07-05 Thread David Sterba
On Wed, Jul 04, 2018 at 03:42:30PM +0300, Nikolay Borisov wrote:
> On  4.07.2018 13:16, Gu Jinxiang wrote:
> > Reported in https://bugzilla.kernel.org/show_bug.cgi?id=199839,
> > which has a invalid chunk, not return error opportunlly.
> > 
> > Add chunk type check in btrfs_check_chunk_valid,
> > to make error be returned in advance.
> > 
> > Reported-by: Xu Wen 
> > Signed-off-by: Gu Jinxiang 
> > Reviewed-by: Qu Wenruo 
> > ---
> > changelog:
> > v3:
> > deal with comment by Nikolay Borisov, change the error message
> > more appropriate.
> > 
> > v2:
> > deal with comment by Qu, add precise check for chunk type.
> > 
> >  fs/btrfs/volumes.c | 29 +
> >  1 file changed, 29 insertions(+)
> > 
> > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> > index b33bf29130b6..deea169cc276 100644
> > --- a/fs/btrfs/volumes.c
> > +++ b/fs/btrfs/volumes.c
> > @@ -6401,6 +6401,8 @@ static int btrfs_check_chunk_valid(struct 
> > btrfs_fs_info *fs_info,
> > u16 num_stripes;
> > u16 sub_stripes;
> > u64 type;
> > +   u64 features;
> > +   int mixed = 0;
> >  
> > length = btrfs_chunk_length(leaf, chunk);
> > stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
> > @@ -6439,6 +6441,33 @@ static int btrfs_check_chunk_valid(struct 
> > btrfs_fs_info *fs_info,
> >   btrfs_chunk_type(leaf, chunk));
> > return -EIO;
> > }
> > +
> > +   if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
> > +   btrfs_err(fs_info, "missing chunk type flag: %llu", type);
> > +   return -EIO;
> > +   }
> > +
> > +   if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
> > +   (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
> > +   btrfs_err(fs_info,
> > +"system chunk type, meanwhile with metadata or data chunk type flag: %llu",
> > +   type);
> 
> That's rather cumbersomely worded, why not simply "Invalid chunk type
> detected"

Updated.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: make sure there is always room for generation number

2018-07-05 Thread Zhihui Zhang
code inspection. If I understand it correctly, we are off by 4 bytes
because CRC is 32 bits and generation is 64 bits. But this bug might
never hit in practice.

thanks,

-Zhihui

On Wed, Jul 4, 2018 at 11:21 AM, David Sterba  wrote:
> On Mon, Jul 02, 2018 at 08:00:54PM -0400, Zhihui Zhang wrote:
>> io_ctl_set_generation() assumes that the generation number shares
>> the same page with inline CRCs. Let's make sure this is always true.
>>
>> Signed-off-by: Zhihui Zhang 
>
> Reviewed-by: David Sterba 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] btrfs: qgroups: Move transaction management inside btrfs_quota_enable/disable

2018-07-05 Thread Nikolay Borisov



On  5.07.2018 14:50, Nikolay Borisov wrote:
> Commit 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to
> btrfs_quota_enable") not only resulted in an easier to follow code but
> it also introduced a subtle bug. It changed the timing when the initial
> transaction rescan was happening - before the commit it would happen
> after transaction commit had occured but after the commit it might happen
> before the transaction was committed. This results in failure to
> correctly rescan the quota since there could be data which is still not
> committed on disk.
> 
> This patch aims to fix this by movign the transaction creation/commit
> inside btrfs_quota_enable, which allows to schedule the quota commit
> after the transaction has been committed.
> 
> Fixes: 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to 
> btrfs_quota_enable")
> Reported-by: Misono Tomohiro 
> Link: https://marc.info/?l=linux-btrfs&m=152999289017582
> Signed-off-by: Nikolay Borisov 
> ---
> 
> Hello, 
> 
> One open-ended question this patch puts is how many items should be reserved
> to guarantee successful qgroup disable. Before this patch quota disable was 
> always using the arbitrary reservation of 2 items. Since this patch makes the 
> the transaction for enable/disable independent we have a chance to actually 
> set appropriate values for both transactions. In any case this patch doesn't 
> make things worse than before. 
> 
>  fs/btrfs/ioctl.c  | 15 ++
>  fs/btrfs/qgroup.c | 86 
> ++-
>  fs/btrfs/qgroup.h |  6 ++--
>  3 files changed, 76 insertions(+), 31 deletions(-)
>

Changes since v2:

* Actually implemented proper transaction unwiding in both functions.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] btrfs: qgroups: Move transaction management inside btrfs_quota_enable/disable

2018-07-05 Thread Nikolay Borisov
Commit 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to
btrfs_quota_enable") not only resulted in an easier to follow code but
it also introduced a subtle bug. It changed the timing when the initial
transaction rescan was happening - before the commit it would happen
after transaction commit had occured but after the commit it might happen
before the transaction was committed. This results in failure to
correctly rescan the quota since there could be data which is still not
committed on disk.

This patch aims to fix this by movign the transaction creation/commit
inside btrfs_quota_enable, which allows to schedule the quota commit
after the transaction has been committed.

Fixes: 5d23515be669 ("btrfs: Move qgroup rescan on quota enable to 
btrfs_quota_enable")
Reported-by: Misono Tomohiro 
Link: https://marc.info/?l=linux-btrfs&m=152999289017582
Signed-off-by: Nikolay Borisov 
---

Hello, 

One open-ended question this patch puts is how many items should be reserved
to guarantee successful qgroup disable. Before this patch quota disable was 
always using the arbitrary reservation of 2 items. Since this patch makes the 
the transaction for enable/disable independent we have a chance to actually 
set appropriate values for both transactions. In any case this patch doesn't 
make things worse than before. 

 fs/btrfs/ioctl.c  | 15 ++
 fs/btrfs/qgroup.c | 86 ++-
 fs/btrfs/qgroup.h |  6 ++--
 3 files changed, 76 insertions(+), 31 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cc03d1a04988..7b308184af57 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5135,9 +5135,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void 
__user *arg)
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_quota_ctl_args *sa;
-   struct btrfs_trans_handle *trans = NULL;
int ret;
-   int err;
 
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -5153,28 +5151,19 @@ static long btrfs_ioctl_quota_ctl(struct file *file, 
void __user *arg)
}
 
down_write(&fs_info->subvol_sem);
-   trans = btrfs_start_transaction(fs_info->tree_root, 2);
-   if (IS_ERR(trans)) {
-   ret = PTR_ERR(trans);
-   goto out;
-   }
 
switch (sa->cmd) {
case BTRFS_QUOTA_CTL_ENABLE:
-   ret = btrfs_quota_enable(trans, fs_info);
+   ret = btrfs_quota_enable(fs_info);
break;
case BTRFS_QUOTA_CTL_DISABLE:
-   ret = btrfs_quota_disable(trans, fs_info);
+   ret = btrfs_quota_disable(fs_info);
break;
default:
ret = -EINVAL;
break;
}
 
-   err = btrfs_commit_transaction(trans);
-   if (err && !ret)
-   ret = err;
-out:
kfree(sa);
up_write(&fs_info->subvol_sem);
 drop_write:
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c25dc47210a3..67e4c7a1cc2b 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -875,8 +875,7 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle 
*trans,
return ret;
 }
 
-int btrfs_quota_enable(struct btrfs_trans_handle *trans,
-  struct btrfs_fs_info *fs_info)
+int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
 {
struct btrfs_root *quota_root;
struct btrfs_root *tree_root = fs_info->tree_root;
@@ -886,6 +885,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_qgroup *qgroup = NULL;
+   struct btrfs_trans_handle *trans = NULL;
int ret = 0;
int slot;
 
@@ -893,9 +893,24 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
if (fs_info->quota_root)
goto out;
 
+   /*
+* 1 for quota root item
+* 1 for BTRFS_QGROUP_STATUS item
+*
+* Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items
+* per subvolume. However those are not currently reserved since it
+* would be a lot of overkill.
+*/
+   trans = btrfs_start_transaction(tree_root, 2);
+   if (IS_ERR(trans)) {
+   ret = PTR_ERR(trans);
+   goto out;
+   }
+
fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
if (!fs_info->qgroup_ulist) {
ret = -ENOMEM;
+   btrfs_abort_transaction(trans, ret);
goto out;
}
 
@@ -906,12 +921,14 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
   BTRFS_QUOTA_TREE_OBJECTID);
if (IS_ERR(quota_root)) {
ret =  PTR_ERR(quota_root);
+   btrfs_abort_transaction(trans, ret);
goto out;
}
 
path = btrfs_alloc_path();
if (!path

Re: [PATCH 1/4] btrfs-progs: check: Remove the ability to rebuild root overwritting existing tree blocks

2018-07-05 Thread Qu Wenruo


On 2018年07月05日 16:50, Gu, Jinxiang wrote:
> 
> 
>> -Original Message-
>> From: linux-btrfs-ow...@vger.kernel.org 
>> [mailto:linux-btrfs-ow...@vger.kernel.org] On Behalf Of Qu Wenruo
>> Sent: Thursday, July 05, 2018 3:37 PM
>> To: linux-btrfs@vger.kernel.org
>> Subject: [PATCH 1/4] btrfs-progs: check: Remove the ability to rebuild root 
>> overwritting existing tree blocks
>>
>> We have function btrfs_fsck_reinit_root() to reinit csum or extent tree.
> 
> Nit:
> or fs/file tree

Nope, btrfs check is only capable of reinit csum or extent tree.

Thanks,
Qu

> 
>> However this function allows us to let it overwrite existing tree blocks
>> using @overwrite parameter.
>>
>> Such behavior is pretty dangerous while no caller is using this feature
>> explicitly.
>>
>> So just remove @overwrite parameter and allow btrfs_fsck_reinit_root()
>> to error out when it fails to allocate tree block.
>>
>> Signed-off-by: Qu Wenruo 
>> ---
>>  check/main.c | 26 --
>>  1 file changed, 8 insertions(+), 18 deletions(-)
>>
>> diff --git a/check/main.c b/check/main.c
>> index 8db300abb825..c8c347236543 100644
>> --- a/check/main.c
>> +++ b/check/main.c
>> @@ -8379,7 +8379,7 @@ static int do_check_chunks_and_extents(struct 
>> btrfs_fs_info *fs_info)
>>  }
>>
>>  static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
>> -   struct btrfs_root *root, int overwrite)
>> +  struct btrfs_root *root)
>>  {
>>  struct extent_buffer *c;
>>  struct extent_buffer *old = root->node;
>> @@ -8389,21 +8389,13 @@ static int btrfs_fsck_reinit_root(struct 
>> btrfs_trans_handle *trans,
>>
>>  level = 0;
>>
>> -if (overwrite) {
>> -c = old;
>> -extent_buffer_get(c);
>> -goto init;
>> -}
>>  c = btrfs_alloc_free_block(trans, root,
>> root->fs_info->nodesize,
>> root->root_key.objectid,
>> &disk_key, level, 0, 0);
>> -if (IS_ERR(c)) {
>> -c = old;
>> -extent_buffer_get(c);
>> -overwrite = 1;
>> -}
>> -init:
>> +if (IS_ERR(c))
>> +return PTR_ERR(c);
>> +
>>  memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
>>  btrfs_set_header_level(c, level);
>>  btrfs_set_header_bytenr(c, c->start);
>> @@ -8422,9 +8414,7 @@ init:
>>  /*
>>   * this case can happen in the following case:
>>   *
>> - * 1.overwrite previous root.
>> - *
>> - * 2.reinit reloc data root, this is because we skip pin
>> + * reinit reloc data root, this is because we skip pin
>>   * down reloc data tree before which means we can allocate
>>   * same block bytenr here.
>>   */
>> @@ -8609,7 +8599,7 @@ reinit_data_reloc:
>>  goto out;
>>  }
>>  record_root_in_trans(trans, root);
>> -ret = btrfs_fsck_reinit_root(trans, root, 0);
>> +ret = btrfs_fsck_reinit_root(trans, root);
>>  if (ret)
>>  goto out;
>>  ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
>> @@ -8675,7 +8665,7 @@ again:
>>  }
>>
>>  /* Ok we can allocate now, reinit the extent root */
>> -ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
>> +ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root);
>>  if (ret) {
>>  fprintf(stderr, "extent root initialization failed\n");
>>  /*
>> @@ -9764,7 +9754,7 @@ int cmd_check(int argc, char **argv)
>>
>>  if (init_csum_tree) {
>>  printf("Reinitialize checksum tree\n");
>> -ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
>> +ret = btrfs_fsck_reinit_root(trans, info->csum_root);
>>  if (ret) {
>>  error("checksum tree initialization failed: %d",
>>  ret);
>> --
> 
> Reviewed-by: Gu Jinxiang 
> 
> 
>> 2.18.0
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> 
> 
> 
> N嫥叉靣笡y氊b瞂千v豝�)藓{.n�+壏{眓谶�)韰骅w*jg�秹殠娸/侁鋤罐枈�2娹櫒璀�&�)摺玜囤瓽珴閔�鎗:+v墾妛鑶佶
> 



signature.asc
Description: OpenPGP digital signature


RE: [PATCH 2/4] btrfs-progs: transaction: Error out other than panic when committing transaction

2018-07-05 Thread Gu, Jinxiang


> -Original Message-
> From: linux-btrfs-ow...@vger.kernel.org 
> [mailto:linux-btrfs-ow...@vger.kernel.org] On Behalf Of Qu Wenruo
> Sent: Thursday, July 05, 2018 3:37 PM
> To: linux-btrfs@vger.kernel.org
> Subject: [PATCH 2/4] btrfs-progs: transaction: Error out other than panic 
> when committing transaction
> 
> There are cases that btrfs_commit_transaction() itself can fail, mostly
> due to ENOSPC when allocating space.
> 
> Don't panic out in this case.
> 
> Signed-off-by: Qu Wenruo 
> ---
>  transaction.c | 19 +--
>  1 file changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/transaction.c b/transaction.c
> index 9619265ef6e8..b82d346b52c8 100644
> --- a/transaction.c
> +++ b/transaction.c
> @@ -73,7 +73,8 @@ static int update_cowonly_root(struct btrfs_trans_handle 
> *trans,
>   ret = btrfs_update_root(trans, tree_root,
>   &root->root_key,
>   &root->root_item);
> - BUG_ON(ret);
> + if (ret < 0)
> + return ret;
>   btrfs_write_dirty_block_groups(trans, root);
>   }
>   return 0;
> @@ -101,9 +102,11 @@ int commit_tree_roots(struct btrfs_trans_handle *trans,
>   next = fs_info->dirty_cowonly_roots.next;
>   list_del_init(next);
>   root = list_entry(next, struct btrfs_root, dirty_list);
> - update_cowonly_root(trans, root);
> + ret = update_cowonly_root(trans, root);
>   free_extent_buffer(root->commit_root);
>   root->commit_root = NULL;
> + if (ret < 0)
> + return ret;
>   }
> 
>   return 0;
> @@ -162,12 +165,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle 
> *trans,
>   root->root_item.level = btrfs_header_level(root->node);
>   ret = btrfs_update_root(trans, root->fs_info->tree_root,
>   &root->root_key, &root->root_item);
> - BUG_ON(ret);
> + if (ret < 0)
> + goto out;
>  commit_tree:
>   ret = commit_tree_roots(trans, fs_info);
> - BUG_ON(ret);
> + if (ret < 0)
> + goto out;
>   ret = __commit_transaction(trans, root);
> - BUG_ON(ret);
> + if (ret < 0)
> + goto out;
>   write_ctree_super(trans);
>   btrfs_finish_extent_commit(trans, fs_info->extent_root,
>  &fs_info->pinned_extents);
> @@ -176,7 +182,8 @@ commit_tree:
>   root->commit_root = NULL;
>   fs_info->running_transaction = NULL;
>   fs_info->last_trans_committed = transid;
> - return 0;
> +out:
> + return ret;
>  }
> 
>  void btrfs_abort_transaction(struct btrfs_trans_handle *trans, int error)
> --

Reviewed-by: Gu Jinxiang 

> 2.18.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 





RE: [PATCH 1/4] btrfs-progs: check: Remove the ability to rebuild root overwritting existing tree blocks

2018-07-05 Thread Gu, Jinxiang


> -Original Message-
> From: linux-btrfs-ow...@vger.kernel.org 
> [mailto:linux-btrfs-ow...@vger.kernel.org] On Behalf Of Qu Wenruo
> Sent: Thursday, July 05, 2018 3:37 PM
> To: linux-btrfs@vger.kernel.org
> Subject: [PATCH 1/4] btrfs-progs: check: Remove the ability to rebuild root 
> overwritting existing tree blocks
> 
> We have function btrfs_fsck_reinit_root() to reinit csum or extent tree.

Nit:
or fs/file tree

> However this function allows us to let it overwrite existing tree blocks
> using @overwrite parameter.
> 
> Such behavior is pretty dangerous while no caller is using this feature
> explicitly.
> 
> So just remove @overwrite parameter and allow btrfs_fsck_reinit_root()
> to error out when it fails to allocate tree block.
> 
> Signed-off-by: Qu Wenruo 
> ---
>  check/main.c | 26 --
>  1 file changed, 8 insertions(+), 18 deletions(-)
> 
> diff --git a/check/main.c b/check/main.c
> index 8db300abb825..c8c347236543 100644
> --- a/check/main.c
> +++ b/check/main.c
> @@ -8379,7 +8379,7 @@ static int do_check_chunks_and_extents(struct 
> btrfs_fs_info *fs_info)
>  }
> 
>  static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
> -struct btrfs_root *root, int overwrite)
> +   struct btrfs_root *root)
>  {
>   struct extent_buffer *c;
>   struct extent_buffer *old = root->node;
> @@ -8389,21 +8389,13 @@ static int btrfs_fsck_reinit_root(struct 
> btrfs_trans_handle *trans,
> 
>   level = 0;
> 
> - if (overwrite) {
> - c = old;
> - extent_buffer_get(c);
> - goto init;
> - }
>   c = btrfs_alloc_free_block(trans, root,
>  root->fs_info->nodesize,
>  root->root_key.objectid,
>  &disk_key, level, 0, 0);
> - if (IS_ERR(c)) {
> - c = old;
> - extent_buffer_get(c);
> - overwrite = 1;
> - }
> -init:
> + if (IS_ERR(c))
> + return PTR_ERR(c);
> +
>   memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
>   btrfs_set_header_level(c, level);
>   btrfs_set_header_bytenr(c, c->start);
> @@ -8422,9 +8414,7 @@ init:
>   /*
>* this case can happen in the following case:
>*
> -  * 1.overwrite previous root.
> -  *
> -  * 2.reinit reloc data root, this is because we skip pin
> +  * reinit reloc data root, this is because we skip pin
>* down reloc data tree before which means we can allocate
>* same block bytenr here.
>*/
> @@ -8609,7 +8599,7 @@ reinit_data_reloc:
>   goto out;
>   }
>   record_root_in_trans(trans, root);
> - ret = btrfs_fsck_reinit_root(trans, root, 0);
> + ret = btrfs_fsck_reinit_root(trans, root);
>   if (ret)
>   goto out;
>   ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
> @@ -8675,7 +8665,7 @@ again:
>   }
> 
>   /* Ok we can allocate now, reinit the extent root */
> - ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
> + ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root);
>   if (ret) {
>   fprintf(stderr, "extent root initialization failed\n");
>   /*
> @@ -9764,7 +9754,7 @@ int cmd_check(int argc, char **argv)
> 
>   if (init_csum_tree) {
>   printf("Reinitialize checksum tree\n");
> - ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
> + ret = btrfs_fsck_reinit_root(trans, info->csum_root);
>   if (ret) {
>   error("checksum tree initialization failed: %d",
>   ret);
> --

Reviewed-by: Gu Jinxiang 


> 2.18.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 





Re: [PATCH 1/2] btrfs-progs: fix nanosecs in task_period_start

2018-07-05 Thread Su Yue




On 07/05/2018 03:20 AM, Stéphane Lesimple wrote:

This is a single-line fix on the preexisting task_period_start function.

Signed-off-by: Stéphane Lesimple 


Reviewed-by: Su Yue 


---
  task-utils.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/task-utils.c b/task-utils.c
index 12b0002..284cbb3 100644
--- a/task-utils.c
+++ b/task-utils.c
@@ -102,7 +102,7 @@ int task_period_start(struct task_info *info, unsigned int 
period_ms)
info->periodic.wakeups_missed = 0;
  
  	sec = period_ms / 1000;

-   ns = (period_ms - (sec * 1000)) * 1000;
+   ns = (period_ms - (sec * 1000)) * 1000 * 1000;
itval.it_interval.tv_sec = sec;
itval.it_interval.tv_nsec = ns;
itval.it_value.tv_sec = sec;




--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] btrfs-progs: test/fuzz: Add image for BUG_ON() when opening the fs by btrfs check

2018-07-05 Thread Qu Wenruo
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199839
Signed-off-by: Qu Wenruo 
---
 tests/fuzz-tests/images/bko-199839.raw.txt | 198 +
 tests/fuzz-tests/images/bko-199839.raw.xz  | Bin 0 -> 24400 bytes
 2 files changed, 198 insertions(+)
 create mode 100644 tests/fuzz-tests/images/bko-199839.raw.txt
 create mode 100644 tests/fuzz-tests/images/bko-199839.raw.xz

diff --git a/tests/fuzz-tests/images/bko-199839.raw.txt 
b/tests/fuzz-tests/images/bko-199839.raw.txt
new file mode 100644
index ..3e4b273d9ec7
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-199839.raw.txt
@@ -0,0 +1,198 @@
+URL: https://bugzilla.kernel.org/show_bug.cgi?id=199839
+Wen Xu 2018-05-26 04:18:45 UTC
+
+Created attachment 276197 [details]
+The (compressed) crafted image which causes crash
+
+- Overview
+use-after-free in try_merge_free_space() when mounting a crafted btrfs image
+
+- Reproduce (4.17 KASAN build)
+# mkdir mnt
+# mount -t btrfs 8.img mnt
+
+- Kernel Message
+[  449.751861] BTRFS: device fsid 12b338de-a2e9-40fa-a4b0-90e53b7c5773 devid 1 
transid 8 /dev/loop0
+[  449.757216] BTRFS info (device loop0): disk space caching is enabled
+[  449.757221] BTRFS info (device loop0): has skinny extents
+[  449.785096] BTRFS error (device loop0): bad tree block start 0 29396992
+[  449.788629] BTRFS info (device loop0): read error corrected: ino 0 off 
29396992 (dev /dev/loop0 sector 73800)
+[  449.792965] BTRFS error (device loop0): bad fsid on block 29409280
+[  449.795193] BTRFS info (device loop0): read error corrected: ino 0 off 
29409280 (dev /dev/loop0 sector 73824)
+[  449.795401] BTRFS info (device loop0): creating UUID tree
+[  449.883426] 
==
+[  449.886228] BUG: KASAN: use-after-free in try_merge_free_space+0xc0/0x2e0
+[  449.888344] Read of size 8 at addr 8801ed10f030 by task mount/1291
+
+[  449.889947] CPU: 1 PID: 1291 Comm: mount Not tainted 4.17.0-rc5+ #6
+[  449.889951] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  449.889953] Call Trace:
+[  449.889976]  dump_stack+0x7b/0xb5
+[  449.890274]  print_address_description+0x70/0x290
+[  449.890286]  kasan_report+0x291/0x390
+[  449.890296]  ? try_merge_free_space+0xc0/0x2e0
+[  449.890303]  __asan_load8+0x54/0x90
+[  449.890310]  try_merge_free_space+0xc0/0x2e0
+[  449.890318]  __btrfs_add_free_space+0x96/0x5e0
+[  449.890324]  ? kasan_check_write+0x14/0x20
+[  449.890331]  ? btrfs_get_block_group+0x1e/0x30
+[  449.890337]  ? block_group_cache_tree_search+0xef/0x150
+[  449.890343]  unpin_extent_range+0x376/0x670
+[  449.890350]  ? __exclude_logged_extent+0x160/0x160
+[  449.890358]  btrfs_finish_extent_commit+0x15b/0x490
+[  449.890371]  ? __find_get_block+0x106/0x400
+[  449.890378]  ? btrfs_prepare_extent_commit+0x1a0/0x1a0
+[  449.890384]  ? write_all_supers+0x714/0x1420
+[  449.890394]  btrfs_commit_transaction+0xaf4/0xfa0
+[  449.890402]  ? btrfs_apply_pending_changes+0xa0/0xa0
+[  449.890407]  ? start_transaction+0x153/0x640
+[  449.890414]  btrfs_create_uuid_tree+0x6a/0x170
+[  449.890419]  open_ctree+0x3b26/0x3ce9
+[  449.890429]  ? close_ctree+0x4a0/0x4a0
+[  449.890441]  ? bdi_register_va+0x44/0x50
+[  449.890451]  ? super_setup_bdi_name+0x11b/0x1a0
+[  449.890457]  ? kill_block_super+0x80/0x80
+[  449.890468]  ? snprintf+0x96/0xd0
+[  449.890479]  btrfs_mount_root+0xae6/0xc60
+[  449.890485]  ? btrfs_mount_root+0xae6/0xc60
+[  449.890491]  ? pcpu_block_update_hint_alloc+0x1f5/0x2a0
+[  449.890498]  ? btrfs_decode_error+0x40/0x40
+[  449.890510]  ? find_next_bit+0x57/0x90
+[  449.890517]  ? cpumask_next+0x1a/0x20
+[  449.890522]  ? pcpu_alloc+0x449/0x8c0
+[  449.890528]  ? pcpu_free_area+0x410/0x410
+[  449.890534]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  449.890540]  ? memcpy+0x45/0x50
+[  449.890547]  mount_fs+0x60/0x1a0
+[  449.890553]  ? btrfs_decode_error+0x40/0x40
+[  449.890558]  ? mount_fs+0x60/0x1a0
+[  449.890565]  ? alloc_vfsmnt+0x309/0x360
+[  449.890570]  vfs_kern_mount+0x6b/0x1a0
+[  449.890576]  ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  449.890583]  btrfs_mount+0x209/0xb71
+[  449.890589]  ? pcpu_block_update_hint_alloc+0x1f5/0x2a0
+[  449.890595]  ? btrfs_remount+0x8e0/0x8e0
+[  449.890601]  ? find_next_zero_bit+0x2c/0xa0
+[  449.890608]  ? find_next_bit+0x57/0x90
+[  449.890613]  ? cpumask_next+0x1a/0x20
+[  449.890617]  ? pcpu_alloc+0x449/0x8c0
+[  449.890624]  ? pcpu_free_area+0x410/0x410
+[  449.890629]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  449.890634]  ? memcpy+0x45/0x50
+[  449.890641]  mount_fs+0x60/0x1a0
+[  449.890646]  ? btrfs_remount+0x8e0/0x8e0
+[  449.890652]  ? mount_fs+0x60/0x1a0
+[  449.890656]  ? alloc_vfsmnt+0x309/0x360
+[  449.890662]  vfs_kern_mount+0x6b/0x1a0
+[  449.890668]  do_mount+0x34a/0x18a0
+[  449.890673]  ? lockref_put_or_lock+0xcf/0x160
+[  449.890680]  ? copy_mount_string+0x20/0x20
+[  449.890685]  ? memcg_kmem_put_cache+0x1b/0xa0
+[  449.890691]  ? kasan_chec

[PATCH 1/3] btrfs: fsck/original: Don't panic out when unexpected root item is referring to one extent

2018-07-05 Thread Qu Wenruo
With crafted image, expected root item can refer to certain extent, and
original mode uses BUG_ON() to handle such case.

Fix it by gracefully return error.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=200403
Signed-off-by: Qu Wenruo 
---
 check/main.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/check/main.c b/check/main.c
index 8db300abb825..6f1182106071 100644
--- a/check/main.c
+++ b/check/main.c
@@ -3724,7 +3724,12 @@ static int check_owner_ref(struct btrfs_root *root,
if (btrfs_header_owner(buf) == back->root)
return 0;
}
-   BUG_ON(rec->is_root);
+   /*
+* Some unexpected root item referring to this one, return 1 to
+* indicate owner not found
+*/
+   if (rec->is_root)
+   return 1;
 
/* try to find the block by search corresponding fs tree */
key.objectid = btrfs_header_owner(buf);
-- 
2.18.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] btrfs-progs: tests/fuzz: Add fuzzed test image for btrfs check BUG_ON

2018-07-05 Thread Qu Wenruo
This fuzzed image will not only cause kernel BUG_ON(), but also btrfs
check BUG_ON() for original mode.

Checking filesystem on /home/adam/btrfs/crafted_images/runtime/0.img
UUID: 3381d111-94a3-4ac7-8f39-611bbbdab7e6
checking extents
check/main.c:3677: check_owner_ref: BUG_ON `rec->is_root` triggered, value 1
btrfs(+0x572c2)[0x562d65da72c2]
btrfs(+0x6098d)[0x562d65db098d]
btrfs(+0x60bb6)[0x562d65db0bb6]
btrfs(+0x6179b)[0x562d65db179b]
btrfs(cmd_check+0x1199)[0x562d65db5589]
btrfs(main+0x88)[0x562d65d62768]
/usr/lib/libc.so.6(__libc_start_main+0xeb)[0x7f4fcbb1b06b]
btrfs(_start+0x2a)[0x562d65d6288a]

Link: https://bugzilla.kernel.org/show_bug.cgi?id=200403
Signed-off-by: Qu Wenruo 
---
 tests/fuzz-tests/images/bko-200403.raw.txt |  93 +
 tests/fuzz-tests/images/bko-200403.raw.xz  | Bin 0 -> 23252 bytes
 2 files changed, 93 insertions(+)
 create mode 100644 tests/fuzz-tests/images/bko-200403.raw.txt
 create mode 100644 tests/fuzz-tests/images/bko-200403.raw.xz

diff --git a/tests/fuzz-tests/images/bko-200403.raw.txt 
b/tests/fuzz-tests/images/bko-200403.raw.txt
new file mode 100644
index ..aae8ea4810bb
--- /dev/null
+++ b/tests/fuzz-tests/images/bko-200403.raw.txt
@@ -0,0 +1,93 @@
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=200403
+Wen Xu 2018-07-04 17:21:58 UTC
+
+Created attachment 277167 [details]
+The (compressed) crafted image which causes crash
+
+- Reproduce
+# mkdir mnt
+# mount -t btrfs 0.img mnt
+# gcc -o poc poc.c
+# ./poc ./mnt
+# umount mnt
+
+- Kernel message
+[  230.611533] BTRFS: device fsid 3381d111-94a3-4ac7-8f39-611bbbdab7e6 devid 1 
transid 8 /dev/loop0
+[  230.632922] BTRFS info (device loop0): disk space caching is enabled
+[  230.632935] BTRFS info (device loop0): has skinny extents
+[  230.647496] BTRFS info (device loop0): creating UUID tree
+[  237.692643] [ cut here ]
+[  237.692654] kernel BUG at fs/btrfs/volumes.c:1625!
+[  237.693822] invalid opcode:  [#1] SMP KASAN PTI
+[  237.694867] CPU: 1 PID: 1387 Comm: umount Not tainted 4.18.0-rc1+ #8
+[  237.696177] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Ubuntu-1.8.2-1ubuntu1 04/01/2014
+[  237.698177] RIP: 0010:btrfs_remove_chunk+0x37a/0xd60
+[  237.699209] Code: e0 48 39 85 28 ff ff ff 77 20 0f b6 85 27 ff ff ff 4d 89 
6f 80 4c 89 f7 4d 89 67 89 41 88 47 88 e8 0b 01 f7 ff e9 f5 fe ff ff <0f> 0b 0f 
85 5c 08 00 00 4d 8d 66 40 4c 89 f7 e8 42 f9 b6 ff 4c 89
+[  237.703034] RSP: 0018:8801f0b0fad8 EFLAGS: 00010206
+[  237.704122] RAX: 0800 RBX: 8801ef4d7c38 RCX: 

+[  237.705572] RDX: ed003e161f30 RSI: 0e70 RDI: 
8801f2a6ae70
+[  237.707035] RBP: 8801f0b0fc38 R08: 8801f0b0f9e0 R09: 
8801f0b0fa20
+[  237.708485] R10: 0003 R11: ed003e161f7c R12: 
0740
+[  237.709929] R13: 0001 R14: 8801f2bf0a50 R15: 
8801f0b0fc10
+[  237.711391] FS:  7f691b770840() GS:8801f6f0() 
knlGS:
+[  237.713034] CS:  0010 DS:  ES:  CR0: 80050033
+[  237.714206] CR2: 00cb0348 CR3: 0001f26f8000 CR4: 
06e0
+[  237.719741] Call Trace:
+[  237.720274]  ? btrfs_grow_device+0x240/0x240
+[  237.721193]  ? kasan_check_read+0x11/0x20
+[  237.722080]  ? mutex_lock+0x99/0xf0
+[  237.722854]  btrfs_delete_unused_bgs+0x4b6/0x5c0
+[  237.723836]  close_ctree+0x40a/0x460
+[  237.724586]  ? transaction_kthread+0x250/0x250
+[  237.725523]  ? dispose_list+0xa0/0xa0
+[  237.726303]  btrfs_put_super+0x25/0x30
+[  237.727110]  generic_shutdown_super+0xb9/0x1c0
+[  237.728032]  kill_anon_super+0x24/0x40
+[  237.728814]  btrfs_kill_super+0x31/0x220
+[  237.729630]  deactivate_locked_super+0x6f/0xa0
+[  237.730548]  deactivate_super+0x5e/0x80
+[  237.731352]  cleanup_mnt+0x61/0xa0
+[  237.732060]  __cleanup_mnt+0x12/0x20
+[  237.732835]  task_work_run+0xc8/0xf0
+[  237.733605]  exit_to_usermode_loop+0x125/0x130
+[  237.734530]  do_syscall_64+0x138/0x170
+[  237.735331]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  237.736676] RIP: 0033:0x7f691b050487
+[  237.737457] Code: 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 
31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 
f0 ff ff 73 01 c3 48 8b 0d e1 c9 2b 00 f7 d8 64 89 01 48
+[  237.741327] RSP: 002b:7ffdf3a06d98 EFLAGS: 0246 ORIG_RAX: 
00a6
+[  237.742889] RAX:  RBX: 00ca7030 RCX: 
7f691b050487
+[  237.744351] RDX: 0001 RSI:  RDI: 
00cae1e0
+[  237.745814] RBP: 00cae1e0 R08:  R09: 
0015
+[  237.747289] R10: 06b2 R11: 0246 R12: 
7f691b55983c
+[  237.748750] R13:  R14: 00ca7210 R15: 
7ffdf3a07020
+[  237.750224] Modules linked in: snd_hda_codec_generic snd_hda_intel 
snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 
soundcore ib_

[PATCH 4/4] btrfs-progs: check/original: Don't overwrite return value when we failed to repair

2018-07-05 Thread Qu Wenruo
In check_inode_recs(), for repair mode we always reset @ret to 0.
It makes no sense and later we check@ret to determine if the repair is
successful.

Fix it by removing the offending overwrite.

Signed-off-by: Qu Wenruo 
---
 check/main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/check/main.c b/check/main.c
index 2b5abb2d025b..08476968aaba 100644
--- a/check/main.c
+++ b/check/main.c
@@ -2764,7 +2764,6 @@ static int check_inode_recs(struct btrfs_root *root,
free_inode_rec(rec);
continue;
}
-   ret = 0;
}
 
if (!(repair && ret == 0))
-- 
2.18.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] btrfs-progs: transaction: Error out other than panic when committing transaction

2018-07-05 Thread Qu Wenruo
There are cases that btrfs_commit_transaction() itself can fail, mostly
due to ENOSPC when allocating space.

Don't panic out in this case.

Signed-off-by: Qu Wenruo 
---
 transaction.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/transaction.c b/transaction.c
index 9619265ef6e8..b82d346b52c8 100644
--- a/transaction.c
+++ b/transaction.c
@@ -73,7 +73,8 @@ static int update_cowonly_root(struct btrfs_trans_handle 
*trans,
ret = btrfs_update_root(trans, tree_root,
&root->root_key,
&root->root_item);
-   BUG_ON(ret);
+   if (ret < 0)
+   return ret;
btrfs_write_dirty_block_groups(trans, root);
}
return 0;
@@ -101,9 +102,11 @@ int commit_tree_roots(struct btrfs_trans_handle *trans,
next = fs_info->dirty_cowonly_roots.next;
list_del_init(next);
root = list_entry(next, struct btrfs_root, dirty_list);
-   update_cowonly_root(trans, root);
+   ret = update_cowonly_root(trans, root);
free_extent_buffer(root->commit_root);
root->commit_root = NULL;
+   if (ret < 0)
+   return ret;
}
 
return 0;
@@ -162,12 +165,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle 
*trans,
root->root_item.level = btrfs_header_level(root->node);
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
-   BUG_ON(ret);
+   if (ret < 0)
+   goto out;
 commit_tree:
ret = commit_tree_roots(trans, fs_info);
-   BUG_ON(ret);
+   if (ret < 0)
+   goto out;
ret = __commit_transaction(trans, root);
-   BUG_ON(ret);
+   if (ret < 0)
+   goto out;
write_ctree_super(trans);
btrfs_finish_extent_commit(trans, fs_info->extent_root,
   &fs_info->pinned_extents);
@@ -176,7 +182,8 @@ commit_tree:
root->commit_root = NULL;
fs_info->running_transaction = NULL;
fs_info->last_trans_committed = transid;
-   return 0;
+out:
+   return ret;
 }
 
 void btrfs_abort_transaction(struct btrfs_trans_handle *trans, int error)
-- 
2.18.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/4] Some random fuzz test fixes

2018-07-05 Thread Qu Wenruo
Can be fetched from github:
https://github.com/adam900710/btrfs-progs/tree/fuzzed_enhance

Some random bugs exposed by the ill-fated fuzz-test (003 never seems to
pass).

It will be a big work to make fuzz/003 to pass, but at least there are
some easy fixes.

Qu Wenruo (4):
  btrfs-progs: check: Remove the ability to rebuild root overwritting
existing tree blocks
  btrfs-progs: transaction: Error out other than panic when committing
transaction
  btrfs-progs: check/original: Avoid infinite loop when failed to repair
inode
  btrfs-progs: check/original: Don't overwrite return value when we
failed to repair

 check/main.c  | 46 +-
 transaction.c | 19 +--
 2 files changed, 38 insertions(+), 27 deletions(-)

-- 
2.18.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] btrfs-progs: check/original: Avoid infinite loop when failed to repair inode

2018-07-05 Thread Qu Wenruo
Exposed by fuzz-tests/003-multi-check-unmounted/ on fuzzed image
bko-161811.raw.xz.

It's caused by the fact when check_fs_roots() finds tree root is
modified, it re-search tree root by goto again: tag.
However again: tag will also reset root objectid to 0.
If we failed to repair one fs root but still modified tree root, we will
go into such infinite loop.

Fix it by record which root we should skip for repair mode.

Signed-off-by: Qu Wenruo 
---
 check/main.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/check/main.c b/check/main.c
index c8c347236543..2b5abb2d025b 100644
--- a/check/main.c
+++ b/check/main.c
@@ -3380,6 +3380,7 @@ static int check_fs_roots(struct btrfs_fs_info *fs_info,
struct extent_buffer *leaf, *tree_node;
struct btrfs_root *tmp_root;
struct btrfs_root *tree_root = fs_info->tree_root;
+   u64 skip_root = 0;
int ret;
int err = 0;
 
@@ -3400,7 +3401,10 @@ static int check_fs_roots(struct btrfs_fs_info *fs_info,
 
 again:
key.offset = 0;
-   key.objectid = 0;
+   if (skip_root)
+   key.objectid = skip_root + 1;
+   else
+   key.objectid = 0;
key.type = BTRFS_ROOT_ITEM_KEY;
ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
if (ret < 0) {
@@ -3409,6 +3413,7 @@ again:
}
tree_node = tree_root->node;
while (1) {
+
if (tree_node != tree_root->node) {
free_root_recs_tree(root_cache);
btrfs_release_path(&path);
@@ -3445,8 +3450,18 @@ again:
btrfs_release_path(&path);
goto again;
}
-   if (ret)
+   if (ret) {
err = 1;
+
+   /*
+* We failed to repair this root but modified 
tree
+* root, after again: tag we will still hit this
+* root and fail to repair, must skip this root 
to
+* avoid infinite loop
+*/
+   if (repair)
+   skip_root = key.objectid;
+   }
if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
btrfs_free_fs_root(tmp_root);
} else if (key.type == BTRFS_ROOT_REF_KEY ||
-- 
2.18.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] btrfs-progs: check: Remove the ability to rebuild root overwritting existing tree blocks

2018-07-05 Thread Qu Wenruo
We have function btrfs_fsck_reinit_root() to reinit csum or extent tree.
However this function allows us to let it overwrite existing tree blocks
using @overwrite parameter.

Such behavior is pretty dangerous while no caller is using this feature
explicitly.

So just remove @overwrite parameter and allow btrfs_fsck_reinit_root()
to error out when it fails to allocate tree block.

Signed-off-by: Qu Wenruo 
---
 check/main.c | 26 --
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/check/main.c b/check/main.c
index 8db300abb825..c8c347236543 100644
--- a/check/main.c
+++ b/check/main.c
@@ -8379,7 +8379,7 @@ static int do_check_chunks_and_extents(struct 
btrfs_fs_info *fs_info)
 }
 
 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
-  struct btrfs_root *root, int overwrite)
+ struct btrfs_root *root)
 {
struct extent_buffer *c;
struct extent_buffer *old = root->node;
@@ -8389,21 +8389,13 @@ static int btrfs_fsck_reinit_root(struct 
btrfs_trans_handle *trans,
 
level = 0;
 
-   if (overwrite) {
-   c = old;
-   extent_buffer_get(c);
-   goto init;
-   }
c = btrfs_alloc_free_block(trans, root,
   root->fs_info->nodesize,
   root->root_key.objectid,
   &disk_key, level, 0, 0);
-   if (IS_ERR(c)) {
-   c = old;
-   extent_buffer_get(c);
-   overwrite = 1;
-   }
-init:
+   if (IS_ERR(c))
+   return PTR_ERR(c);
+
memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_level(c, level);
btrfs_set_header_bytenr(c, c->start);
@@ -8422,9 +8414,7 @@ init:
/*
 * this case can happen in the following case:
 *
-* 1.overwrite previous root.
-*
-* 2.reinit reloc data root, this is because we skip pin
+* reinit reloc data root, this is because we skip pin
 * down reloc data tree before which means we can allocate
 * same block bytenr here.
 */
@@ -8609,7 +8599,7 @@ reinit_data_reloc:
goto out;
}
record_root_in_trans(trans, root);
-   ret = btrfs_fsck_reinit_root(trans, root, 0);
+   ret = btrfs_fsck_reinit_root(trans, root);
if (ret)
goto out;
ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
@@ -8675,7 +8665,7 @@ again:
}
 
/* Ok we can allocate now, reinit the extent root */
-   ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
+   ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root);
if (ret) {
fprintf(stderr, "extent root initialization failed\n");
/*
@@ -9764,7 +9754,7 @@ int cmd_check(int argc, char **argv)
 
if (init_csum_tree) {
printf("Reinitialize checksum tree\n");
-   ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
+   ret = btrfs_fsck_reinit_root(trans, info->csum_root);
if (ret) {
error("checksum tree initialization failed: %d",
ret);
-- 
2.18.0

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html