On Fri, Jun 12, 2015 at 11:00:15AM +0800, Qu Wenruo wrote:
> Introduce the new partial csum mechanism for tree block.
> 
> [Old tree block csum]
> 0     4     8    12    16    20    24    28    32
> -------------------------------------------------
> |csum |   unused, all 0                               |
> -------------------------------------------------
> Csum is the crc32 of the whole tree block data.
> 
> [New tree block csum]
> -------------------------------------------------
> |csum0|csum1|csum2|csum3|csum4|csum5|csum6|csum7|
> -------------------------------------------------
> Where csum0 is the same as the old one, crc32 of the whole tree block
> data.
> 
> But csum1~csum7 will restore crc32 of each eighth part.
> Take example of 16K leafsize, then:
> csum1: crc32 of BTRFS_CSUM_SIZE~4K
> csum2: crc32 of 4K~6K
> ...
> csum7: crc32 of 14K~16K
> 
> This provides the ability for btrfs not only to detect corruption but
> also to know where corruption is.
> Further improve the robustness of btrfs.

What of the use to know where corruption locates?  It still turns to
find a good copy of this block (if there is) and recovery from it,
doesn't it?

Can you eleborate more on how to improve the robustness?

> 
> Although the best practise is to introduce new csum type and put every
> eighth crc32 into corresponding place, but the benefit is not worthy to
> break the backward compatibility.
> So keep csum0 and modify csum1 range to keep backward compatibility.
> 
> Signed-off-by: Qu Wenruo <quwen...@cn.fujitsu.com>
> ---
> TODO:
>   Add scrub support to use partial csum to rebuild metadata.
>   This is a little hard as current scrub don't treat tree block as a whole.
>   Needs a lot of other improvement for scrub.
> ---
>  fs/btrfs/disk-io.c | 75 
> ++++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 50 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 7f83778..54052d3 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -271,47 +271,76 @@ void btrfs_csum_final(u32 crc, char *result)
>  }
>  
>  /*
> - * compute the csum for a btree block, and either verify it or write it
> - * into the csum field of the block.
> + * Calcuate partial crc32 for each part.
> + *
> + * Part should be in [0, 7].
> + * Part 0 is the old crc32 of the whole leaf/node.
> + * Part 1 is the crc32 of 32~ 2/8 of leaf/node.
> + * Part 2 is the crc32 of 3/8 of leaf/node.
> + * Part 3 is the crc32 of 4/8 of lean/node and so on.

typo here.

Thanks,

-liubo
>   */
> -static int csum_tree_block(struct btrfs_fs_info *fs_info,
> -                        struct extent_buffer *buf,
> -                        int verify)
> +static int csum_tree_block_part(struct extent_buffer *buf,
> +                             char *result, int part)
>  {
> -     u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
> -     char *result = NULL;
> +     int offset;
> +     int err;
>       unsigned long len;
>       unsigned long cur_len;
> -     unsigned long offset = BTRFS_CSUM_SIZE;
> -     char *kaddr;
>       unsigned long map_start;
>       unsigned long map_len;
> -     int err;
> +     char *kaddr;
>       u32 crc = ~(u32)0;
> -     unsigned long inline_result;
>  
> -     len = buf->len - offset;
> +     BUG_ON(part >= 8 || part < 0);
> +     BUG_ON(ALIGN(buf->len, 8) != buf->len);
> +
> +     if (part == 0) {
> +             offset = BTRFS_CSUM_SIZE;
> +             len = buf->len - offset;
> +     } else if (part == 0) {
> +             offset = BTRFS_CSUM_SIZE;
> +             len = buf->len * 2 / 8 - offset;
> +     } else {
> +             offset = part * buf->len / 8;
> +             len = buf->len / 8;
> +     }
> +
>       while (len > 0) {
>               err = map_private_extent_buffer(buf, offset, 32,
>                                       &kaddr, &map_start, &map_len);
>               if (err)
> -                     return 1;
> +                     return err;
>               cur_len = min(len, map_len - (offset - map_start));
>               crc = btrfs_csum_data(kaddr + offset - map_start,
>                                     crc, cur_len);
>               len -= cur_len;
>               offset += cur_len;
>       }
> -     if (csum_size > sizeof(inline_result)) {
> -             result = kzalloc(csum_size, GFP_NOFS);
> -             if (!result)
> +     btrfs_csum_final(crc, result + BTRFS_CSUM_SIZE * part/ 8);
> +     return 0;
> +}
> +
> +/*
> + * compute the csum for a btree block, and either verify it or write it
> + * into the csum field of the block.
> + */
> +static int csum_tree_block(struct btrfs_fs_info *fs_info,
> +                        struct extent_buffer *buf,
> +                        int verify)
> +{
> +     u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
> +     char result[BTRFS_CSUM_SIZE] = {0};
> +     int err;
> +     u32 crc = ~(u32)0;
> +     int index = 0;
> +
> +     /* get every part csum */
> +     for (index = 0; index < 8; index++) {
> +             err = csum_tree_block_part(buf, result, index);
> +             if (err)
>                       return 1;
> -     } else {
> -             result = (char *)&inline_result;
>       }
>  
> -     btrfs_csum_final(crc, result);
> -
>       if (verify) {
>               if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
>                       u32 val;
> @@ -324,15 +353,11 @@ static int csum_tree_block(struct btrfs_fs_info 
> *fs_info,
>                               "level %d\n",
>                               fs_info->sb->s_id, buf->start,
>                               val, found, btrfs_header_level(buf));
> -                     if (result != (char *)&inline_result)
> -                             kfree(result);
>                       return 1;
>               }
>       } else {
> -             write_extent_buffer(buf, result, 0, csum_size);
> +             write_extent_buffer(buf, result, 0, BTRFS_CSUM_SIZE);
>       }
> -     if (result != (char *)&inline_result)
> -             kfree(result);
>       return 0;
>  }
>  
> -- 
> 2.4.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to