On 2019/10/28 14:58, Shin'ichiro Kawasaki wrote:
> On sudden f2fs shutdown, write pointers of zoned block devices can go
> further but f2fs meta data keeps current segments at positions before the
> write operations. After remounting the f2fs, this inconsistency causes
> write operations not at write pointers and "Unaligned write command"
> error is reported.
> 
> To avoid the error, compare current segments with write pointers of open
> zones the current segments point to, during mount operation. If the write
> pointer position is not aligned with the current segment position, assign
> a new zone to the current segments. Also check the newly assigned zone
> has write pointer at zone start. If not, make mount fail and ask users to
> run fsck.
> 
> Perform the consistency check twice. Once during fsync recovery. Not to
> lose the fsync data, do the check after fsync data gets restored and
> before checkpoint commit which flushes data at current segment positions.
> The second check is done at end of f2fs_fill_super() to make sure the
> write pointer consistency regardless of fsync data recovery execution.
> 
> Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawas...@wdc.com>
> ---
>  fs/f2fs/f2fs.h     |   1 +
>  fs/f2fs/recovery.c |   6 +++
>  fs/f2fs/segment.c  | 127 +++++++++++++++++++++++++++++++++++++++++++++
>  fs/f2fs/super.c    |   8 +++
>  4 files changed, 142 insertions(+)
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 4024790028aa..0216282c5b80 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3136,6 +3136,7 @@ void f2fs_write_node_summaries(struct f2fs_sb_info 
> *sbi, block_t start_blk);
>  int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
>                       unsigned int val, int alloc);
>  void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control 
> *cpc);
> +int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi, bool check_only);
>  int f2fs_build_segment_manager(struct f2fs_sb_info *sbi);
>  void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
>  int __init f2fs_create_segment_manager_caches(void);
> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> index 783773e4560d..c75d1cbae4d1 100644
> --- a/fs/f2fs/recovery.c
> +++ b/fs/f2fs/recovery.c
> @@ -795,6 +795,12 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, 
> bool check_only)
>       if (need_writecp) {
>               set_sbi_flag(sbi, SBI_IS_RECOVERED);
>  
> +             /* recover zoned block devices' write pointer consistency */
> +             if (!err && f2fs_sb_has_blkzoned(sbi)) {
> +                     err = f2fs_fix_curseg_write_pointer(sbi, false);

Can we check and reset current segment under SBI_POR_DOING's protection? since
once SBI_POR_DOING flag is cleared, kworker is able to flush dirty data/node,
which may trigger unaligned write command if write pointer is inconsistent.

Thanks,

> +                     ret = err;
> +             }
> +
>               if (!err) {
>                       struct cp_control cpc = {
>                               .reason = CP_RECOVERY,
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 808709581481..2b6e637dd6d3 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -4331,6 +4331,133 @@ static int sanity_check_curseg(struct f2fs_sb_info 
> *sbi)
>       return 0;
>  }
>  
> +#ifdef CONFIG_BLK_DEV_ZONED
> +
> +static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
> +                                               block_t zone_blkaddr)
> +{
> +     int i;
> +
> +     for (i = 0; i < sbi->s_ndevs; i++) {
> +             if (!bdev_is_zoned(FDEV(i).bdev))
> +                     continue;
> +             if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
> +                                       zone_blkaddr <= FDEV(i).end_blk))
> +                     return &FDEV(i);
> +     }
> +
> +     return NULL;
> +}
> +
> +static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type,
> +                                 bool check_only)
> +{
> +     struct curseg_info *cs = CURSEG_I(sbi, type);
> +     struct f2fs_dev_info *zbd;
> +     struct blk_zone zone;
> +     unsigned int cs_section, wp_segno, wp_blkoff, nr_zones, wp_sector_off;
> +     block_t cs_zone_block, wp_block, cs_block;
> +     unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
> +     sector_t zone_sector;
> +     int err;
> +
> +     cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
> +     cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
> +     cs_block = START_BLOCK(sbi, cs->segno) + cs->next_blkoff;
> +
> +     zbd = get_target_zoned_dev(sbi, cs_zone_block);
> +     if (!zbd)
> +             return 0;
> +
> +     /* report zone for the sector the curseg points to */
> +     zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
> +             << log_sectors_per_block;
> +     nr_zones = 1;
> +     err = blkdev_report_zones(zbd->bdev, zone_sector, &zone, &nr_zones);
> +     if (err) {
> +             f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
> +                      zbd->path, err);
> +             return err;
> +     }
> +
> +     if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
> +             return 0;
> +
> +     wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
> +     wp_segno = GET_SEGNO(sbi, wp_block);
> +     wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
> +     wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
> +
> +     if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
> +         wp_sector_off == 0)
> +             return 0;
> +
> +     f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
> +                 "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
> +                 type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
> +
> +     /* if check_only is specified, return error without fix */
> +     if (check_only)
> +             return -EIO;
> +
> +     f2fs_notice(sbi, "Assign new section to curseg[%d]: "
> +                 "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
> +     allocate_segment_by_default(sbi, type, true);
> +
> +     /* check newly assigned zone */
> +     cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
> +     cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
> +
> +     zbd = get_target_zoned_dev(sbi, cs_zone_block);
> +     if (!zbd)
> +             return 0;
> +
> +     zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
> +             << log_sectors_per_block;
> +     nr_zones = 1;
> +     err = blkdev_report_zones(zbd->bdev, zone_sector, &zone, &nr_zones);
> +     if (err) {
> +             f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
> +                      zbd->path, err);
> +             return err;
> +     }
> +
> +     if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
> +             return 0;
> +
> +     if (zone.wp != zone.start) {
> +             f2fs_notice(sbi,
> +                         "New section for curseg[%d] is not empty, "
> +                         "run fsck to fix: curseg[0x%x,0x%x]",
> +                         type, cs->segno, cs->next_blkoff);
> +             __set_inuse(sbi, GET_SEGNO(sbi, cs_zone_block));
> +             f2fs_stop_checkpoint(sbi, true);
> +             set_sbi_flag(sbi, SBI_NEED_FSCK);
> +             return -EINVAL;
> +     }
> +
> +     return 0;
> +}
> +
> +int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi, bool check_only)
> +{
> +     int i, ret;
> +
> +     for (i = 0; i < NO_CHECK_TYPE; i++) {
> +             ret = fix_curseg_write_pointer(sbi, i, check_only);
> +             if (ret)
> +                     return ret;
> +     }
> +
> +     return 0;
> +}
> +#else
> +int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi, bool check_only)
> +{
> +     return 0;
> +}
> +#endif
> +
>  /*
>   * Update min, max modified time for cost-benefit GC algorithm
>   */
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 1443cee15863..ebd0ae02a260 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -3525,6 +3525,14 @@ static int f2fs_fill_super(struct super_block *sb, 
> void *data, int silent)
>                       goto free_meta;
>               }
>       }
> +
> +     /* check zoned block devices' write pointer consistency */
> +     if (f2fs_sb_has_blkzoned(sbi)) {
> +             err = f2fs_fix_curseg_write_pointer(sbi, f2fs_readonly(sb));
> +             if (err)
> +                     goto free_meta;
> +     }
> +
>  reset_checkpoint:
>       /* f2fs_recover_fsync_data() cleared this already */
>       clear_sbi_flag(sbi, SBI_POR_DOING);
> 


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to