This patch introduces a new sysfs entry /sys/fs/f2fs/<disk>/flush_policy
in order to tune performance of f2fs data flush flow.

For example, checkpoint will use REQ_FUA to persist CP metadata, however,
some kind device has bad performance on REQ_FUA command, result in that
checkpoint being blocked for long time, w/ this sysfs entry, we can give
an option to use REQ_PREFLUSH command instead of REQ_FUA during checkpoint,
it can help to mitigate long latency of checkpoint.

Signed-off-by: Chao Yu <[email protected]>
---
v2:
- use __submit_flush_wait() instead of f2fs_flush_device_cache() to
just flush primary device cache
 Documentation/ABI/testing/sysfs-fs-f2fs |  9 +++++++++
 fs/f2fs/checkpoint.c                    | 11 ++++++++++-
 fs/f2fs/f2fs.h                          |  7 +++++++
 fs/f2fs/sysfs.c                         |  9 +++++++++
 4 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs 
b/Documentation/ABI/testing/sysfs-fs-f2fs
index bc0e7fefc39d..2fedb44b713b 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -883,3 +883,12 @@ Date:              June 2025
 Contact:       "Daeho Jeong" <[email protected]>
 Description:   Control GC algorithm for boost GC. 0: cost benefit, 1: greedy
                Default: 1
+
+What:          /sys/fs/f2fs/<disk>/flush_policy
+Date:          July 2025
+Contact:       "Chao Yu" <[email protected]>
+Description:   Device has different performance for the same flush methods, 
this node
+               can be used to tune performance by setting different flush 
methods.
+
+               policy value            description
+               0x00000001              Use preflush instead of fua during 
checkpoint
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index db3831f7f2f5..fafa2156732d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1419,7 +1419,9 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
        f2fs_folio_put(folio, false);
 
        /* submit checkpoint (with barrier if NOBARRIER is not set) */
-       f2fs_submit_merged_write(sbi, META_FLUSH);
+       f2fs_submit_merged_write(sbi,
+               sbi->flush_policy & BIT(FLUSH_POLICY_CP_NO_FUA) ?
+               META : META_FLUSH);
 }
 
 static inline u64 get_sectors_written(struct block_device *bdev)
@@ -1594,6 +1596,13 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
 
        __set_cp_next_pack(sbi);
 
+       /* flush device cache to make sure last cp pack can be persisted */
+       if (sbi->flush_policy & BIT(FLUSH_POLICY_CP_NO_FUA)) {
+               err = __submit_flush_wait(sbi, sbi->sb->s_bdev);
+               if (err)
+                       return err;
+       }
+
        /*
         * redirty superblock if metadata like node page or inode cache is
         * updated during writing checkpoint.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 46be7560548c..5db5eba4cbd7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1594,6 +1594,11 @@ struct decompress_io_ctx {
 #define MAX_COMPRESS_LOG_SIZE          8
 #define MAX_COMPRESS_WINDOW_SIZE(log_size)     ((PAGE_SIZE) << (log_size))
 
+enum flush_policy {
+       FLUSH_POLICY_CP_NO_FUA,
+       FLUSH_POLICY_MAX,
+};
+
 struct f2fs_sb_info {
        struct super_block *sb;                 /* pointer to VFS super block */
        struct proc_dir_entry *s_proc;          /* proc entry */
@@ -1845,6 +1850,8 @@ struct f2fs_sb_info {
        /* carve out reserved_blocks from total blocks */
        bool carve_out;
 
+       unsigned int flush_policy;              /* flush policy */
+
 #ifdef CONFIG_F2FS_FS_COMPRESSION
        struct kmem_cache *page_array_slab;     /* page array entry */
        unsigned int page_array_slab_size;      /* default page array slab size 
*/
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index f736052dea50..b69015f1dc67 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -852,6 +852,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
                return count;
        }
 
+       if (!strcmp(a->attr.name, "flush_policy")) {
+               if (t >= BIT(FLUSH_POLICY_MAX))
+                       return -EINVAL;
+               *ui = (unsigned int)t;
+               return count;
+       }
+
        if (!strcmp(a->attr.name, "gc_boost_gc_multiple")) {
                if (t < 1 || t > SEGS_PER_SEC(sbi))
                        return -EINVAL;
@@ -1175,6 +1182,7 @@ F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy);
 #endif
 F2FS_SBI_GENERAL_RW_ATTR(carve_out);
 F2FS_SBI_GENERAL_RW_ATTR(reserved_pin_section);
+F2FS_SBI_GENERAL_RW_ATTR(flush_policy);
 
 /* STAT_INFO ATTR */
 #ifdef CONFIG_F2FS_STAT_FS
@@ -1371,6 +1379,7 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(max_read_extent_count),
        ATTR_LIST(carve_out),
        ATTR_LIST(reserved_pin_section),
+       ATTR_LIST(flush_policy),
        NULL,
 };
 ATTRIBUTE_GROUPS(f2fs);
-- 
2.49.0



_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to