On sudden f2fs shutdown, write pointers of zoned block devices can go
further but f2fs meta data keeps current segments at positions before the
write operations. After remounting the f2fs, this inconsistency causes
write operations not at write pointers and "Unaligned write command"
error is reported.

To avoid the error, compare current segments with write pointers of open
zones the current segments point to, during mount operation. If the write
pointer position is not aligned with the current segment position, assign
a new zone to the current segments. Also check the newly assigned zone
has write pointer at zone start. If not, make mount fail and ask users to
run fsck.

Perform the consistency check twice. Once during fsync recovery. Not to
lose the fsync data, do the check after fsync data gets restored and
before checkpoint commit which flushes data at current segment positions.
The second check is done at end of f2fs_fill_super() to make sure the
write pointer consistency regardless of fsync data recovery execution.

Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawas...@wdc.com>
---
 fs/f2fs/f2fs.h     |   1 +
 fs/f2fs/recovery.c |   6 +++
 fs/f2fs/segment.c  | 127 +++++++++++++++++++++++++++++++++++++++++++++
 fs/f2fs/super.c    |   8 +++
 4 files changed, 142 insertions(+)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4024790028aa..0216282c5b80 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3136,6 +3136,7 @@ void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, 
block_t start_blk);
 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
                        unsigned int val, int alloc);
 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi, bool check_only);
 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi);
 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
 int __init f2fs_create_segment_manager_caches(void);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 783773e4560d..c75d1cbae4d1 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -795,6 +795,12 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool 
check_only)
        if (need_writecp) {
                set_sbi_flag(sbi, SBI_IS_RECOVERED);
 
+               /* recover zoned block devices' write pointer consistency */
+               if (!err && f2fs_sb_has_blkzoned(sbi)) {
+                       err = f2fs_fix_curseg_write_pointer(sbi, false);
+                       ret = err;
+               }
+
                if (!err) {
                        struct cp_control cpc = {
                                .reason = CP_RECOVERY,
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 808709581481..2b6e637dd6d3 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -4331,6 +4331,133 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
        return 0;
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
+
+static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
+                                                 block_t zone_blkaddr)
+{
+       int i;
+
+       for (i = 0; i < sbi->s_ndevs; i++) {
+               if (!bdev_is_zoned(FDEV(i).bdev))
+                       continue;
+               if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
+                                         zone_blkaddr <= FDEV(i).end_blk))
+                       return &FDEV(i);
+       }
+
+       return NULL;
+}
+
+static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type,
+                                   bool check_only)
+{
+       struct curseg_info *cs = CURSEG_I(sbi, type);
+       struct f2fs_dev_info *zbd;
+       struct blk_zone zone;
+       unsigned int cs_section, wp_segno, wp_blkoff, nr_zones, wp_sector_off;
+       block_t cs_zone_block, wp_block, cs_block;
+       unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
+       sector_t zone_sector;
+       int err;
+
+       cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
+       cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
+       cs_block = START_BLOCK(sbi, cs->segno) + cs->next_blkoff;
+
+       zbd = get_target_zoned_dev(sbi, cs_zone_block);
+       if (!zbd)
+               return 0;
+
+       /* report zone for the sector the curseg points to */
+       zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
+               << log_sectors_per_block;
+       nr_zones = 1;
+       err = blkdev_report_zones(zbd->bdev, zone_sector, &zone, &nr_zones);
+       if (err) {
+               f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
+                        zbd->path, err);
+               return err;
+       }
+
+       if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
+               return 0;
+
+       wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
+       wp_segno = GET_SEGNO(sbi, wp_block);
+       wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
+       wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
+
+       if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
+           wp_sector_off == 0)
+               return 0;
+
+       f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
+                   "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
+                   type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
+
+       /* if check_only is specified, return error without fix */
+       if (check_only)
+               return -EIO;
+
+       f2fs_notice(sbi, "Assign new section to curseg[%d]: "
+                   "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
+       allocate_segment_by_default(sbi, type, true);
+
+       /* check newly assigned zone */
+       cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
+       cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
+
+       zbd = get_target_zoned_dev(sbi, cs_zone_block);
+       if (!zbd)
+               return 0;
+
+       zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
+               << log_sectors_per_block;
+       nr_zones = 1;
+       err = blkdev_report_zones(zbd->bdev, zone_sector, &zone, &nr_zones);
+       if (err) {
+               f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
+                        zbd->path, err);
+               return err;
+       }
+
+       if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
+               return 0;
+
+       if (zone.wp != zone.start) {
+               f2fs_notice(sbi,
+                           "New section for curseg[%d] is not empty, "
+                           "run fsck to fix: curseg[0x%x,0x%x]",
+                           type, cs->segno, cs->next_blkoff);
+               __set_inuse(sbi, GET_SEGNO(sbi, cs_zone_block));
+               f2fs_stop_checkpoint(sbi, true);
+               set_sbi_flag(sbi, SBI_NEED_FSCK);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi, bool check_only)
+{
+       int i, ret;
+
+       for (i = 0; i < NO_CHECK_TYPE; i++) {
+               ret = fix_curseg_write_pointer(sbi, i, check_only);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+#else
+int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi, bool check_only)
+{
+       return 0;
+}
+#endif
+
 /*
  * Update min, max modified time for cost-benefit GC algorithm
  */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1443cee15863..ebd0ae02a260 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3525,6 +3525,14 @@ static int f2fs_fill_super(struct super_block *sb, void 
*data, int silent)
                        goto free_meta;
                }
        }
+
+       /* check zoned block devices' write pointer consistency */
+       if (f2fs_sb_has_blkzoned(sbi)) {
+               err = f2fs_fix_curseg_write_pointer(sbi, f2fs_readonly(sb));
+               if (err)
+                       goto free_meta;
+       }
+
 reset_checkpoint:
        /* f2fs_recover_fsync_data() cleared this already */
        clear_sbi_flag(sbi, SBI_POR_DOING);
-- 
2.21.0



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to