On sudden f2fs shutdown, write pointers of zoned block devices can go
further but f2fs meta data keeps current segments at positions before the
write operations. After remounting the f2fs, this inconsistency causes
write operations not at write pointers and "Unaligned write command"
error is reported.

To avoid the error, compare current segments with write pointers of open
zones the current segments point to, during mount operation. If the write
pointer position is not aligned with the current segment position, assign
a new zone to the current segment. Also check the newly assigned zone has
write pointer at zone start. If not, reset write pointer of the zone.

Perform the consistency check during fsync recovery. Not to lose the
fsync data, do the check after fsync data gets restored and before
checkpoint commit which flushes data at current segment positions. Not to
cause conflict with kworker's dirfy data/node flush, do the fix within
SBI_POR_DOING protection.

Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawas...@wdc.com>
---
 fs/f2fs/f2fs.h     |   1 +
 fs/f2fs/recovery.c |  20 ++++++-
 fs/f2fs/segment.c  | 131 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 150 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5a888a063c7f..002c417b0a53 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3155,6 +3155,7 @@ void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, 
block_t start_blk);
 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
                        unsigned int val, int alloc);
 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi);
 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi);
 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
 int __init f2fs_create_segment_manager_caches(void);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 76477f71d4ee..763d5c0951d1 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -723,6 +723,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool 
check_only)
        int ret = 0;
        unsigned long s_flags = sbi->sb->s_flags;
        bool need_writecp = false;
+       bool fix_curseg_write_pointer = false;
 #ifdef CONFIG_QUOTA
        int quota_enabled;
 #endif
@@ -774,6 +775,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool 
check_only)
                sbi->sb->s_flags = s_flags;
        }
 skip:
+       fix_curseg_write_pointer = !check_only || list_empty(&inode_list);
+
        destroy_fsync_dnodes(&inode_list, err);
        destroy_fsync_dnodes(&tmp_inode_list, err);
 
@@ -784,9 +787,22 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool 
check_only)
        if (err) {
                truncate_inode_pages_final(NODE_MAPPING(sbi));
                truncate_inode_pages_final(META_MAPPING(sbi));
-       } else {
-               clear_sbi_flag(sbi, SBI_POR_DOING);
        }
+
+       /*
+        * If fsync data succeeds or there is no fsync data to recover,
+        * and the f2fs is not read only, check and fix zoned block devices'
+        * write pointer consistency.
+        */
+       if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) &&
+                       f2fs_sb_has_blkzoned(sbi)) {
+               err = f2fs_fix_curseg_write_pointer(sbi);
+               ret = err;
+       }
+
+       if (!err)
+               clear_sbi_flag(sbi, SBI_POR_DOING);
+
        mutex_unlock(&sbi->cp_mutex);
 
        /* let's drop all the directory inodes for clean checkpoint */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 56e81447e2f3..9b6c7ab67b93 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -4368,6 +4368,137 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
        return 0;
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
+
+static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
+                                                 block_t zone_blkaddr)
+{
+       int i;
+
+       for (i = 0; i < sbi->s_ndevs; i++) {
+               if (!bdev_is_zoned(FDEV(i).bdev))
+                       continue;
+               if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
+                               zone_blkaddr <= FDEV(i).end_blk))
+                       return &FDEV(i);
+       }
+
+       return NULL;
+}
+
+static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
+                             void *data) {
+       memcpy(data, zone, sizeof(struct blk_zone));
+       return 0;
+}
+
+static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
+{
+       struct curseg_info *cs = CURSEG_I(sbi, type);
+       struct f2fs_dev_info *zbd;
+       struct blk_zone zone;
+       unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
+       block_t cs_zone_block, wp_block, cs_block;
+       unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
+       sector_t zone_sector;
+       int err;
+
+       cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
+       cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
+       cs_block = START_BLOCK(sbi, cs->segno) + cs->next_blkoff;
+
+       zbd = get_target_zoned_dev(sbi, cs_zone_block);
+       if (!zbd)
+               return 0;
+
+       /* report zone for the sector the curseg points to */
+       zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
+               << log_sectors_per_block;
+       err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
+                                 report_one_zone_cb, &zone);
+       if (err != 1) {
+               f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
+                        zbd->path, err);
+               return err;
+       }
+
+       if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
+               return 0;
+
+       wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
+       wp_segno = GET_SEGNO(sbi, wp_block);
+       wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
+       wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
+
+       if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
+               wp_sector_off == 0)
+               return 0;
+
+       f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
+                   "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
+                   type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
+
+       f2fs_notice(sbi, "Assign new section to curseg[%d]: "
+                   "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
+       allocate_segment_by_default(sbi, type, true);
+
+       /* check newly assigned zone */
+       cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
+       cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
+
+       zbd = get_target_zoned_dev(sbi, cs_zone_block);
+       if (!zbd)
+               return 0;
+
+       zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
+               << log_sectors_per_block;
+       err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
+                                 report_one_zone_cb, &zone);
+       if (err != 1) {
+               f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
+                        zbd->path, err);
+               return err;
+       }
+
+       if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
+               return 0;
+
+       if (zone.wp != zone.start) {
+               f2fs_notice(sbi,
+                           "New zone for curseg[%d] is not yet discarded. "
+                           "Reset the zone: curseg[0x%x,0x%x]",
+                           type, cs->segno, cs->next_blkoff);
+               err = blkdev_zone_mgmt(zbd->bdev, REQ_OP_ZONE_RESET,
+                                      zone.start, zone.len, GFP_NOFS);
+               if (err) {
+                       f2fs_notice(sbi, "Reset zone failed: %s (errno=%d)",
+                                   zbd->path, err);
+                       return err;
+               }
+       }
+
+       return 0;
+}
+
+int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
+{
+       int i, ret;
+
+       for (i = 0; i < NO_CHECK_TYPE; i++) {
+               ret = fix_curseg_write_pointer(sbi, i);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+#else
+int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
+{
+       return 0;
+}
+#endif
+
 /*
  * Update min, max modified time for cost-benefit GC algorithm
  */
-- 
2.23.0



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to