Re: [PATCH 3/3] btrfs: Do super block verification before writing it to disk

2018-04-20 Thread Anand Jain






@@ -3563,6 +3565,12 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int 
max_mirrors)
sb = fs_info->super_for_commit;
dev_item = >dev_item;
  
+	if (btrfs_check_super_valid(fs_info, sb, -1)) {

+   btrfs_err(fs_info,
+   "superblock corruption detected before transaction commitment");
+   return -EUCLEAN;
+   }
+
mutex_lock(_info->fs_devices->device_list_mutex);
head = _info->fs_devices->devices;
max_errors = btrfs_super_num_devices(fs_info->super_copy) - 1;


 With patch 1/3 (incompat feature checks) now this set as a
 whole address my concern that I explained earlier. I am ok with
 this approach.

Thanks, Anand
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] btrfs: Do super block verification before writing it to disk

2018-04-19 Thread Qu Wenruo


On 2018年04月19日 18:16, David Sterba wrote:
> Looks good, some minor comments below. I'm wondering how to test that.

Currently I'm using the most stupid way to test it, insert code randomly
modifies super blocks.

> We'd have to inject either the corruption or to provide a way to
> forcibly fail the test. For the latter a debugfs should do, I'll send
> something for comments.

What about a sysfs interface to modify super blocks?
Since we already have sectorsize and nodessize, allow it to be writeable
for CONFIG_BTRFS_DEBUG looks pretty good.

Thanks,
Qu

> 
> On Thu, Apr 19, 2018 at 05:38:16PM +0800, Qu Wenruo wrote:
>> @@ -3563,6 +3565,12 @@ int write_all_supers(struct btrfs_fs_info *fs_info, 
>> int max_mirrors)
>>  sb = fs_info->super_for_commit;
>>  dev_item = >dev_item;
>>  
>> +if (btrfs_check_super_valid(fs_info, sb, -1)) {
> 
> A comment that this is skipping the bytenr check would be good.
> 
>> +btrfs_err(fs_info,
>> +"superblock corruption detected before transaction commitment");
> 
>commit
> 
> 
>> +return -EUCLEAN;
>> +}
>> +
>>  mutex_lock(_info->fs_devices->device_list_mutex);
>>  head = _info->fs_devices->devices;
>>  max_errors = btrfs_super_num_devices(fs_info->super_copy) - 1;
>> @@ -3974,9 +3982,18 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 
>> parent_transid, int level,
>>level, first_key);
>>  }
>>  
>> -static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
>> +/*
>> + * Check the validation of btrfs super block.
>> + *
>> + * @sb: super block to check
>> + * @super_mirror:   the super block number to check its bytenr.
>> + *  0 means the primary (1st) sb, 1 and 2 means 2nd and
>> + *  3rd backup sb, while -1 means to skip bytenr check.
>> + */
>> +static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
>> +   struct btrfs_super_block *sb,
>> +   int super_mirror)
>>  {
>> -struct btrfs_super_block *sb = fs_info->super_copy;
>>  u64 nodesize = btrfs_super_nodesize(sb);
>>  u64 sectorsize = btrfs_super_sectorsize(sb);
>>  int ret = 0;
>> @@ -4019,7 +4036,7 @@ static int btrfs_check_super_valid(struct 
>> btrfs_fs_info *fs_info)
>>   * Check sectorsize and nodesize first, other check will need it.
>>   * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
>>   */
>> -if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
>> +if (!is_power_of_2(sectorsize) || sectorsize < SZ_4K ||
> 
> No unrelated changes please. There are some remaining raw values, send a
> separate patch if you want to convert them.
> 
>>  sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
>>  btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
>>  ret = -EINVAL;
>> @@ -4088,9 +4105,10 @@ static int btrfs_check_super_valid(struct 
>> btrfs_fs_info *fs_info)
>>  ret = -EINVAL;
>>  }
>>  
>> -if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
>> -btrfs_err(fs_info, "super offset mismatch %llu != %u",
>> -  btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
>> +if (super_mirror >= 0 && btrfs_super_bytenr(sb) !=
>> +btrfs_sb_offset(super_mirror)) {
>> +btrfs_err(fs_info, "super offset mismatch %llu != %llu",
>> +btrfs_super_bytenr(sb), btrfs_sb_offset(super_mirror));
>>  ret = -EINVAL;
>>  }
>>  
>> -- 
>> 2.17.0
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 3/3] btrfs: Do super block verification before writing it to disk

2018-04-19 Thread David Sterba
Looks good, some minor comments below. I'm wondering how to test that.
We'd have to inject either the corruption or to provide a way to
forcibly fail the test. For the latter a debugfs should do, I'll send
something for comments.

On Thu, Apr 19, 2018 at 05:38:16PM +0800, Qu Wenruo wrote:
> @@ -3563,6 +3565,12 @@ int write_all_supers(struct btrfs_fs_info *fs_info, 
> int max_mirrors)
>   sb = fs_info->super_for_commit;
>   dev_item = >dev_item;
>  
> + if (btrfs_check_super_valid(fs_info, sb, -1)) {

A comment that this is skipping the bytenr check would be good.

> + btrfs_err(fs_info,
> + "superblock corruption detected before transaction commitment");

   commit


> + return -EUCLEAN;
> + }
> +
>   mutex_lock(_info->fs_devices->device_list_mutex);
>   head = _info->fs_devices->devices;
>   max_errors = btrfs_super_num_devices(fs_info->super_copy) - 1;
> @@ -3974,9 +3982,18 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 
> parent_transid, int level,
> level, first_key);
>  }
>  
> -static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
> +/*
> + * Check the validation of btrfs super block.
> + *
> + * @sb:  super block to check
> + * @super_mirror:the super block number to check its bytenr.
> + *   0 means the primary (1st) sb, 1 and 2 means 2nd and
> + *   3rd backup sb, while -1 means to skip bytenr check.
> + */
> +static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
> +struct btrfs_super_block *sb,
> +int super_mirror)
>  {
> - struct btrfs_super_block *sb = fs_info->super_copy;
>   u64 nodesize = btrfs_super_nodesize(sb);
>   u64 sectorsize = btrfs_super_sectorsize(sb);
>   int ret = 0;
> @@ -4019,7 +4036,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info 
> *fs_info)
>* Check sectorsize and nodesize first, other check will need it.
>* Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
>*/
> - if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
> + if (!is_power_of_2(sectorsize) || sectorsize < SZ_4K ||

No unrelated changes please. There are some remaining raw values, send a
separate patch if you want to convert them.

>   sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
>   btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
>   ret = -EINVAL;
> @@ -4088,9 +4105,10 @@ static int btrfs_check_super_valid(struct 
> btrfs_fs_info *fs_info)
>   ret = -EINVAL;
>   }
>  
> - if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
> - btrfs_err(fs_info, "super offset mismatch %llu != %u",
> -   btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
> + if (super_mirror >= 0 && btrfs_super_bytenr(sb) !=
> + btrfs_sb_offset(super_mirror)) {
> + btrfs_err(fs_info, "super offset mismatch %llu != %llu",
> + btrfs_super_bytenr(sb), btrfs_sb_offset(super_mirror));
>   ret = -EINVAL;
>   }
>  
> -- 
> 2.17.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] btrfs: Do super block verification before writing it to disk

2018-04-19 Thread Qu Wenruo
There are already 2 reports about strangely corrupted super blocks,
where csum still matches but extra garbage gets slipped into super block.

The corruption would looks like:
--
superblock: bytenr=65536, device=/dev/sdc1
-
csum_type   41700 (INVALID)
csum0x3b252d3a [match]
bytenr  65536
flags   0x1
( WRITTEN )
magic   _BHRfS_M [match]
...
incompat_flags  0x5b224169
( MIXED_BACKREF |
  COMPRESS_LZO |
  BIG_METADATA |
  EXTENDED_IREF |
  SKINNY_METADATA |
  unknown flag: 0x5b224000 )
...
--
Or
--
superblock: bytenr=65536, device=/dev/mapper/x
-
csum_type  35355 (INVALID)
csum_size  32
csum   0xf0dbeddd [match]
bytenr 65536
flags  0x1
   ( WRITTEN )
magic  _BHRfS_M [match]
...
incompat_flags 0x176d2169
   ( MIXED_BACKREF |
 COMPRESS_LZO |
 BIG_METADATA |
 EXTENDED_IREF |
 SKINNY_METADATA |
 unknown flag: 0x176d2000 )
--

Obviously, csum_type and incompat_flags get some garbage, but its csum
still matches, which means kernel calculates the csum based on corrupted
super block memory.
And after manually fixing these values, the filesystem is completely
healthy without any problem exposed by btrfs check.

Although the cause is still unknown, at least detect it and prevent further
corruption.

Reported-by: Ken Swenson 
Reported-by: Ben Parsons <9parso...@gmail.com>
Signed-off-by: Qu Wenruo 
---
 fs/btrfs/disk-io.c | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b189ec086bc2..fc62f84c3613 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -55,7 +55,9 @@
 static const struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info);
+static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+  struct btrfs_super_block *sb,
+  int super_mirror);
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
  struct btrfs_fs_info *fs_info);
@@ -2668,7 +2670,7 @@ int open_ctree(struct super_block *sb,
 
memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
 
-   ret = btrfs_check_super_valid(fs_info);
+   ret = btrfs_check_super_valid(fs_info, fs_info->super_copy, 0);
if (ret) {
btrfs_err(fs_info, "superblock contains fatal errors");
err = -EINVAL;
@@ -3563,6 +3565,12 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int 
max_mirrors)
sb = fs_info->super_for_commit;
dev_item = >dev_item;
 
+   if (btrfs_check_super_valid(fs_info, sb, -1)) {
+   btrfs_err(fs_info,
+   "superblock corruption detected before transaction commitment");
+   return -EUCLEAN;
+   }
+
mutex_lock(_info->fs_devices->device_list_mutex);
head = _info->fs_devices->devices;
max_errors = btrfs_super_num_devices(fs_info->super_copy) - 1;
@@ -3974,9 +3982,18 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 
parent_transid, int level,
  level, first_key);
 }
 
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
+/*
+ * Check the validation of btrfs super block.
+ *
+ * @sb:super block to check
+ * @super_mirror:  the super block number to check its bytenr.
+ * 0 means the primary (1st) sb, 1 and 2 means 2nd and
+ * 3rd backup sb, while -1 means to skip bytenr check.
+ */
+static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+  struct btrfs_super_block *sb,
+  int super_mirror)
 {
-   struct btrfs_super_block *sb = fs_info->super_copy;
u64 nodesize = btrfs_super_nodesize(sb);
u64 sectorsize = btrfs_super_sectorsize(sb);
int ret = 0;
@@ -4019,7 +4036,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info 
*fs_info)
 * Check sectorsize and nodesize first, other check will need it.
 * Check all possible