On 2019-8-29 14:35, Shin'ichiro Kawasaki wrote: > On sudden f2fs shutdown, zoned block device status and f2fs current > segment positions in meta data can be inconsistent. When f2fs shutdown > happens before write operations completes, write pointers of zoned block > devices can go further but f2fs meta data keeps current segments at > positions before the write operations. After remounting the f2fs, the > inconsistency causes write operations not at write pointers and > "Unaligned write command" error is reported. This error was observed when > xfstests test case generic/388 was run with f2fs on a zoned block device. > > To avoid the error, have f2fs.fsck check consistency between each current > segment's position and the write pointer of the zone the current segment > points to. If the write pointer goes advance from the current segment, > fix the current segment position setting at same as the write pointer > position. In case the write pointer is behind the current segment, write > zero data at the write pointer position to make write pointer position at > same as the current segment. > > When inconsistencies are found, turn on c.bug_on flag in fsck_verify() to > ask users to fix them or not. When inconsistencies get fixed, turn on > 'force' flag in fsck_verify() to enforce fixes in following checks. This > position fix is done at the beginning of do_fsck() function so that other > checks reflect the current segment modification. > > Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawas...@wdc.com> > Reviewed-by: Chao Yu <yuch...@huawei.com> > --- > fsck/fsck.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > fsck/fsck.h | 3 ++ > fsck/main.c | 2 + > 3 files changed, 136 insertions(+) > > diff --git a/fsck/fsck.c b/fsck/fsck.c > index 8953ca1..f45ca39 100644 > --- a/fsck/fsck.c > +++ b/fsck/fsck.c > @@ -2574,6 +2574,123 @@ out: > return cnt; > } > > +struct write_pointer_check_data { > + struct f2fs_sb_info *sbi; > + struct device_info *dev; > +}; > + > +static int fsck_chk_write_pointer(int i, struct blk_zone *blkz, void *opaque) > +{ > + struct write_pointer_check_data *wpd = opaque; > + struct f2fs_sb_info *sbi = wpd->sbi; > + struct device_info *dev = wpd->dev; > + struct f2fs_fsck *fsck = F2FS_FSCK(sbi); > + block_t zone_block, wp_block, wp_blkoff, cs_block, b; > + unsigned int zone_segno, wp_segno; > + struct seg_entry *se; > + struct curseg_info *cs; > + int cs_index, ret; > + int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; > + unsigned int segs_per_zone = sbi->segs_per_sec * sbi->secs_per_zone; > + void *zero_blk; > + > + if (blk_zone_conv(blkz)) > + return 0; > + > + zone_block = dev->start_blkaddr > + + (blk_zone_sector(blkz) >> log_sectors_per_block); > + zone_segno = GET_SEGNO(sbi, zone_block); > + wp_block = dev->start_blkaddr > + + (blk_zone_wp_sector(blkz) >> log_sectors_per_block); > + wp_segno = GET_SEGNO(sbi, wp_block); > + wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); > + > + /* find the curseg which points to the zone */ > + for (cs_index = 0; cs_index < NO_CHECK_TYPE; cs_index++) { > + cs = &SM_I(sbi)->curseg_array[cs_index]; > + if (zone_segno <= cs->segno && > + cs->segno < zone_segno + segs_per_zone) > + break; > + } > + > + if (cs_index >= NR_CURSEG_TYPE) > + return 0; > + > + /* check write pointer consistency with the curseg in the zone */ > + cs_block = START_BLOCK(sbi, cs->segno) + cs->next_blkoff; > + if (wp_block == cs_block) > + return 0; > + > + if (!c.fix_on) { > + MSG(0, "Inconsistent write pointer: " > + "curseg %d[0x%x,0x%x] wp[0x%x,0x%x]\n", > + cs_index, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff); > + fsck->chk.wp_inconsistent_zones++; > + return 0; > + } > + > + /* > + * If the curseg is in advance from the write pointer, write zero to > + * move the write pointer forward to the same position as the curseg. > + */ > + if (wp_block < cs_block) { > + ret = 0; > + zero_blk = calloc(BLOCK_SZ, 1); > + if (!zero_blk) > + return -EINVAL; > + > + FIX_MSG("Advance write pointer to match with curseg %d: " > + "[0x%x,0x%x]->[0x%x,0x%x]", > + cs_index, wp_segno, wp_blkoff, > + cs->segno, cs->next_blkoff); > + for (b = wp_block; b < cs_block && !ret; b++) > + ret = dev_write_block(zero_blk, b); > + > + fsck->chk.wp_fixed_zones++; > + free(zero_blk); > + return ret; > + } > + > + /* > + * If the write pointer is in advance from the curseg, modify the > + * curseg position to be same as the write pointer. > + */ > + FIX_MSG("Advance curseg %d: [0x%x,0x%x]->[0x%x,0x%x]", > + cs_index, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff); > + se = get_seg_entry(sbi, wp_segno); > + se->type = cs_index; > + cs->segno = wp_segno; > + cs->next_blkoff = wp_blkoff;
I have one more question, if current zone is closed, will wp_blkoff equal to segment size (wp_block equal to zone size)? As you know, .next_blkoff should never equal to segment size. Thanks, > + fsck->chk.wp_fixed_zones++; > + > + return 0; > +} > + > +void fsck_chk_write_pointers(struct f2fs_sb_info *sbi) > +{ > + unsigned int i; > + struct f2fs_fsck *fsck = F2FS_FSCK(sbi); > + struct write_pointer_check_data wpd = { sbi, NULL }; > + > + if (c.zoned_model != F2FS_ZONED_HM) > + return; > + > + for (i = 0; i < MAX_DEVICES; i++) { > + if (!c.devices[i].path) > + break; > + > + wpd.dev = c.devices + i; > + if (f2fs_report_zones(i, fsck_chk_write_pointer, &wpd)) { > + printf("[FSCK] Write pointer check failed: %s\n", > + c.devices[i].path); > + return; > + } > + } > + > + if (fsck->chk.wp_fixed_zones && c.fix_on) > + write_curseg_info(sbi); > +} > + > int fsck_chk_curseg_info(struct f2fs_sb_info *sbi) > { > struct curseg_info *curseg; > @@ -2624,6 +2741,20 @@ int fsck_verify(struct f2fs_sb_info *sbi) > > printf("\n"); > > + if (c.zoned_model == F2FS_ZONED_HM) { > + printf("[FSCK] Write pointers consistency "); > + if (fsck->chk.wp_inconsistent_zones == 0x0) { > + printf(" [Ok..]\n"); > + } else { > + printf(" [Fail] [0x%x]\n", > + fsck->chk.wp_inconsistent_zones); > + c.bug_on = 1; > + } > + > + if (fsck->chk.wp_fixed_zones && c.fix_on) > + force = 1; > + } > + > if (c.feature & cpu_to_le32(F2FS_FEATURE_LOST_FOUND)) { > for (i = 0; i < fsck->nr_nat_entries; i++) > if (f2fs_test_bit(i, fsck->nat_area_bitmap) != 0) > diff --git a/fsck/fsck.h b/fsck/fsck.h > index d38e8de..aa3dbe7 100644 > --- a/fsck/fsck.h > +++ b/fsck/fsck.h > @@ -80,6 +80,8 @@ struct f2fs_fsck { > u32 multi_hard_link_files; > u64 sit_valid_blocks; > u32 sit_free_segs; > + u32 wp_fixed_zones; > + u32 wp_inconsistent_zones; > } chk; > > struct hard_link_node *hard_link_list_head; > @@ -156,6 +158,7 @@ int fsck_chk_inline_dentries(struct f2fs_sb_info *, > struct f2fs_node *, > struct child_info *); > void fsck_chk_checkpoint(struct f2fs_sb_info *sbi); > int fsck_chk_meta(struct f2fs_sb_info *sbi); > +void fsck_chk_write_pointers(struct f2fs_sb_info *); > int fsck_chk_curseg_info(struct f2fs_sb_info *); > void pretty_print_filename(const u8 *raw_name, u32 len, > char out[F2FS_PRINT_NAMELEN], int enc_name); > diff --git a/fsck/main.c b/fsck/main.c > index 9aca024..4b4a789 100644 > --- a/fsck/main.c > +++ b/fsck/main.c > @@ -584,6 +584,8 @@ static void do_fsck(struct f2fs_sb_info *sbi) > > print_cp_state(flag); > > + fsck_chk_write_pointers(sbi); > + > fsck_chk_curseg_info(sbi); > > if (!c.fix_on && !c.bug_on) { > _______________________________________________ Linux-f2fs-devel mailing list Linux-f2fs-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel