On Dec 09, 2019 / 17:36, Chao Yu wrote:
> On 2019/12/9 15:28, Shinichiro Kawasaki wrote:
> > On Dec 09, 2019 / 10:04, Chao Yu wrote:
> >> On 2019/12/2 17:40, Shin'ichiro Kawasaki wrote:
> >>> To catch f2fs bugs in write pointer handling code for zoned block
> >>> devices, check write pointers of non-open zones that current segments do
> >>> not point to. Do this check at mount time, after the fsync data recovery
> >>> and current segments' write pointer consistency fix. Or when fsync data
> >>> recovery is disabled by mount option, do the check when there is no fsync
> >>> data.
> >>>
> >>> Check two items comparing write pointers with valid block maps in SIT.
> >>> The first item is check for zones with no valid blocks. When there is no
> >>> valid blocks in a zone, the write pointer should be at the start of the
> >>> zone. If not, next write operation to the zone will cause unaligned write
> >>> error. If write pointer is not at the zone start, reset the write pointer
> >>> to place at the zone start.
> >>>
> >>> The second item is check between the write pointer position and the last
> >>> valid block in the zone. It is unexpected that the last valid block
> >>> position is beyond the write pointer. In such a case, report as a bug.
> >>> Fix is not required for such zone, because the zone is not selected for
> >>> next write operation until the zone get discarded.
> >>>
> >>> Signed-off-by: Shin'ichiro Kawasaki <[email protected]>
> >>> ---
> >>>  fs/f2fs/f2fs.h    |   1 +
> >>>  fs/f2fs/segment.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++
> >>>  fs/f2fs/super.c   |  11 ++++
> >>>  3 files changed, 138 insertions(+)
> >>>
> >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> >>> index 002c417b0a53..23a84d7f17b8 100644
> >>> --- a/fs/f2fs/f2fs.h
> >>> +++ b/fs/f2fs/f2fs.h
> >>> @@ -3156,6 +3156,7 @@ int f2fs_lookup_journal_in_cursum(struct 
> >>> f2fs_journal *journal, int type,
> >>>                   unsigned int val, int alloc);
> >>>  void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control 
> >>> *cpc);
> >>>  int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi);
> >>> +int f2fs_check_write_pointer(struct f2fs_sb_info *sbi);
> >>>  int f2fs_build_segment_manager(struct f2fs_sb_info *sbi);
> >>>  void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
> >>>  int __init f2fs_create_segment_manager_caches(void);
> >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>> index 9b6c7ab67b93..48903b7a9d25 100644
> >>> --- a/fs/f2fs/segment.c
> >>> +++ b/fs/f2fs/segment.c
> >>> @@ -4370,6 +4370,90 @@ static int sanity_check_curseg(struct f2fs_sb_info 
> >>> *sbi)
> >>>  
> >>>  #ifdef CONFIG_BLK_DEV_ZONED
> >>>  
> >>> +static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
> >>> +                             struct f2fs_dev_info *fdev,
> >>> +                             struct blk_zone *zone)
> >>> +{
> >>> + unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
> >>> + block_t zone_block, wp_block, last_valid_block;
> >>> + unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
> >>> + int i, s, b, ret;
> >>> + struct seg_entry *se;
> >>> +
> >>> + if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
> >>> +         return 0;
> >>> +
> >>> + wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
> >>> + wp_segno = GET_SEGNO(sbi, wp_block);
> >>> + wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
> >>> + zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
> >>> + zone_segno = GET_SEGNO(sbi, zone_block);
> >>> + zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
> >>> +
> >>> + if (zone_segno >= MAIN_SEGS(sbi))
> >>> +         return 0;
> >>> +
> >>> + /*
> >>> +  * Skip check of zones cursegs point to, since
> >>> +  * fix_curseg_write_pointer() checks them.
> >>> +  */
> >>> + for (i = 0; i < NO_CHECK_TYPE; i++)
> >>> +         if (zone_secno == GET_SEC_FROM_SEG(sbi,
> >>> +                                            CURSEG_I(sbi, i)->segno))
> >>> +                 return 0;
> >>> +
> >>> + /*
> >>> +  * Get last valid block of the zone.
> >>> +  */
> >>> + last_valid_block = zone_block - 1;
> >>> + for (s = sbi->segs_per_sec - 1; s >= 0; s--) {
> >>> +         segno = zone_segno + s;
> >>> +         se = get_seg_entry(sbi, segno);
> >>> +         for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
> >>> +                 if (f2fs_test_bit(b, se->cur_valid_map)) {
> >>> +                         last_valid_block = START_BLOCK(sbi, segno) + b;
> >>> +                         break;
> >>> +                 }
> >>> +         if (last_valid_block >= zone_block)
> >>> +                 break;
> >>> + }
> >>> +
> >>> + /*
> >>> +  * If last valid block is beyond the write pointer, report the
> >>> +  * inconsistency. This inconsistency does not cause write error
> >>> +  * because the zone will not be selected for write operation until
> >>> +  * it get discarded. Just report it.
> >>> +  */
> >>> + if (last_valid_block >= wp_block) {
> >>> +         f2fs_notice(sbi, "Valid block beyond write pointer: "
> >>> +                     "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
> >>> +                     GET_SEGNO(sbi, last_valid_block),
> >>> +                     GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
> >>> +                     wp_segno, wp_blkoff);
> >>> +         return 0;
> >>> + }
> >>> +
> >>> + /*
> >>> +  * If there is no valid block in the zone and if write pointer is
> >>> +  * not at zone start, reset the write pointer.
> >>> +  */
> >>> + if (last_valid_block + 1 == zone_block && zone->wp != zone->start) {
> >>> +         f2fs_notice(sbi,
> >>> +                     "Zone without valid block has non-zero write "
> >>> +                     "pointer. Reset the write pointer: wp[0x%x,0x%x]",
> >>> +                     wp_segno, wp_blkoff);
> >>> +         ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_RESET,
> >>> +                                zone->start, zone->len, GFP_NOFS);
> >>
> >> Should use __f2fs_issue_discard_zone() to cover multi-device support?
> > 
> > Yes, __f2fs_issue_discard_zone() for each device adds more check and I think
> > it is safer for multi-device case. Will re-post the patch with you suggest.
> > 
> >>
> >>> +         if (ret) {
> >>> +                 f2fs_notice(sbi, "Reset zone failed: %s (errno=%d)",
> >>> +                             fdev->path, ret);
> >>> +                 return ret;
> >>> +         }
> >>
> >> Just out of curiosity, how long will RESET command take normally?
> > 
> > Though I don't have accurate numbers, it takes around 10 milliseconds to
> > complete one RESET command for one zone with my system. Assuming the number
> 
> Okay, as RESET command cover large-sized area, I guess we can afford such 
> delay
> during mount(), what I'm concerning is we will trigger synchoronous RESET
> command during checkpoint(), as we will unlock cp_rwsem after handling RESET
> command, so it potentially hangs fs operations long time once there is many
> pending RESET commands, maybe we can improve RESET command handling like we 
> did
> for discard, anyway it's another topic.

Thank you for sharing your thoughts. I keep this in mind.

I have sent out v5 for review. Of note is that I replaced blkdev_zone_mgmt()
with __f2fs_issue_discard_zone()in the 2nd patch as well as the 1st patch. Then
I didn't add your reviewed-by tag in the 1st patch.

> 
> Thanks,
> 
> > of zones to discard here is small, I think the additional overhead is small
> > enough to ignore.
> > 
> > Thanks!
> > 
> >>
> >> Thanks,
> >>
> >>> + }
> >>> +
> >>> + return 0;
> >>> +}
> >>> +
> >>>  static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info 
> >>> *sbi,
> >>>                                             block_t zone_blkaddr)
> >>>  {
> >>> @@ -4442,6 +4526,10 @@ static int fix_curseg_write_pointer(struct 
> >>> f2fs_sb_info *sbi, int type)
> >>>               "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
> >>>   allocate_segment_by_default(sbi, type, true);
> >>>  
> >>> + /* check consistency of the zone curseg pointed to */
> >>> + if (check_zone_write_pointer(sbi, zbd, &zone))
> >>> +         return -EIO;
> >>> +
> >>>   /* check newly assigned zone */
> >>>   cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
> >>>   cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
> >>> @@ -4492,11 +4580,49 @@ int f2fs_fix_curseg_write_pointer(struct 
> >>> f2fs_sb_info *sbi)
> >>>  
> >>>   return 0;
> >>>  }
> >>> +
> >>> +struct check_zone_write_pointer_args {
> >>> + struct f2fs_sb_info *sbi;
> >>> + struct f2fs_dev_info *fdev;
> >>> +};
> >>> +
> >>> +static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned 
> >>> int idx,
> >>> +                               void *data) {
> >>> + struct check_zone_write_pointer_args *args;
> >>> + args = (struct check_zone_write_pointer_args *)data;
> >>> +
> >>> + return check_zone_write_pointer(args->sbi, args->fdev, zone);
> >>> +}
> >>> +
> >>> +int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
> >>> +{
> >>> + int i, ret;
> >>> + struct check_zone_write_pointer_args args;
> >>> +
> >>> + for (i = 0; i < sbi->s_ndevs; i++) {
> >>> +         if (!bdev_is_zoned(FDEV(i).bdev))
> >>> +                 continue;
> >>> +
> >>> +         args.sbi = sbi;
> >>> +         args.fdev = &FDEV(i);
> >>> +         ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
> >>> +                                   check_zone_write_pointer_cb, &args);
> >>> +         if (ret < 0)
> >>> +                 return ret;
> >>> + }
> >>> +
> >>> + return 0;
> >>> +}
> >>>  #else
> >>>  int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
> >>>  {
> >>>   return 0;
> >>>  }
> >>> +
> >>> +int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
> >>> +{
> >>> + return 0;
> >>> +}
> >>>  #endif
> >>>  
> >>>  /*
> >>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> >>> index 5111e1ffe58a..755ad57c795b 100644
> >>> --- a/fs/f2fs/super.c
> >>> +++ b/fs/f2fs/super.c
> >>> @@ -3544,6 +3544,17 @@ static int f2fs_fill_super(struct super_block *sb, 
> >>> void *data, int silent)
> >>>                   goto free_meta;
> >>>           }
> >>>   }
> >>> +
> >>> + /*
> >>> +  * If the f2fs is not readonly and fsync data recovery succeeds,
> >>> +  * check zoned block devices' write pointer consistency.
> >>> +  */
> >>> + if (!err && !f2fs_readonly(sb) && f2fs_sb_has_blkzoned(sbi)) {
> >>> +         err = f2fs_check_write_pointer(sbi);
> >>> +         if (err)
> >>> +                 goto free_meta;
> >>> + }
> >>> +
> >>>  reset_checkpoint:
> >>>   /* f2fs_recover_fsync_data() cleared this already */
> >>>   clear_sbi_flag(sbi, SBI_POR_DOING);
> >>>
> > 
> > --
> > Best Regards,
> > Shin'ichiro Kawasaki.
> > 

--
Best Regards,
Shin'ichiro Kawasaki

_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to