Thanks Jens, On top of the patch killing the dead code, I wrote another one to detect the idle time by the internal account logic like below. IMHO, it'd be better to decouple f2fs with other layers, if possible.
Thanks, >From 85daf5190671b3d98ef779bdea77b4a046658708 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim <[email protected]> Date: Tue, 16 Oct 2018 10:20:53 -0700 Subject: [PATCH] f2fs: account read IOs and use IO counts for is_idle This patch adds issued read IO counts which is under block layer. Signed-off-by: Jaegeuk Kim <[email protected]> --- fs/f2fs/data.c | 24 ++++++++++++++++++++++-- fs/f2fs/debug.c | 7 ++++++- fs/f2fs/f2fs.h | 22 ++++++++++++++++------ 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8952f2d610a6..5fdc8d751f19 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -52,6 +52,23 @@ static bool __is_cp_guaranteed(struct page *page) return false; } +static enum count_type __read_io_type(struct page *page) +{ + struct address_space *mapping = page->mapping; + + if (mapping) { + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (inode->i_ino == F2FS_META_INO(sbi)) + return F2FS_RD_META; + + if (inode->i_ino == F2FS_NODE_INO(sbi)) + return F2FS_RD_NODE; + } + return F2FS_RD_DATA; +} + /* postprocessing steps for read bios */ enum bio_post_read_step { STEP_INITIAL = 0, @@ -82,6 +99,7 @@ static void __read_end_io(struct bio *bio) } else { SetPageUptodate(page); } + dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } if (bio->bi_private) @@ -464,8 +482,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) __submit_bio(fio->sbi, bio, fio->type); - if (!is_read_io(fio->op)) - inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); + inc_page_count(fio->sbi, is_read_io(fio->op) ? + __read_io_type(page): WB_DATA_TYPE(fio->page)); return 0; } @@ -595,6 +613,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, } ClearPageError(page); __submit_bio(F2FS_I_SB(inode), bio, DATA); + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); return 0; } @@ -1593,6 +1612,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); ClearPageError(page); last_block_in_bio = block_nr; goto next_page; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 026e10f30889..139b4d5c83d5 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -55,6 +55,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); + si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA); + si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE); + si->nr_rd_meta = get_pages(sbi, F2FS_RD_META); if (SM_I(sbi) && SM_I(sbi)->fcc_info) { si->nr_flushed = atomic_read(&SM_I(sbi)->fcc_info->issued_flush); @@ -371,7 +374,9 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " + seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n", + si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta); + seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 156e6cd2c812..5c80eca194b5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -950,6 +950,9 @@ enum count_type { F2FS_DIRTY_IMETA, F2FS_WB_CP_DATA, F2FS_WB_DATA, + F2FS_RD_DATA, + F2FS_RD_NODE, + F2FS_RD_META, NR_COUNT_TYPE, }; @@ -1392,11 +1395,6 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, return wait_ms; } -static inline bool is_idle(struct f2fs_sb_info *sbi, int type) -{ - return f2fs_time_over(sbi, type); -} - /* * Inline functions */ @@ -1787,7 +1785,9 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) atomic_inc(&sbi->nr_pages[count_type]); if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES || - count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA) + count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA || + count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE || + count_type == F2FS_RD_META) return; set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -2124,6 +2124,15 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, return bio_alloc(GFP_KERNEL, npages); } +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +{ + if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || + get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || + get_pages(sbi, F2FS_WB_CP_DATA)) + return false; + return f2fs_time_over(sbi, type); +} + static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, unsigned long index, void *item) { @@ -3116,6 +3125,7 @@ struct f2fs_stat_info { int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; + int nr_rd_data, nr_rd_node, nr_rd_meta; unsigned int io_skip_bggc, other_skip_bggc; int nr_flushing, nr_flushed, flush_list_empty; int nr_discarding, nr_discarded; -- 2.19.0.605.g01d371f741-goog On 10/16, Jens Axboe wrote: > On 10/16/18 10:06 AM, Chao Yu wrote: > > Hi Jens, > > > > On 2018-10-16 22:34, Jens Axboe wrote: > >> This doesn't work on stacked devices, and it doesn't work on > >> blk-mq devices. The request_list is only used on legacy, which > >> we don't have much of anymore, and soon won't have any of. > >> > >> Kill the check. > > > > In order to avoid conflicting with user IO, GC and Discard thread try to be > > aware of IO status of device by is_idle(), if previous implementation > > doesn't > > work, do we have any other existing method to get such status? Or do you > > have > > any suggestion on this? > > This particular patch just kills code that won't yield any useful > information on stacked devices or blk-mq. As those are the only > ones that will exist shortly, it's dead. > > For blk-mq, you could do a busy tag iteration. Something like this. > That should be done separately though, in essence the existing code > is dead/useless. > > > diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c > index 4254e74c1446..823f04209e8c 100644 > --- a/block/blk-mq-tag.c > +++ b/block/blk-mq-tag.c > @@ -403,6 +403,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, > busy_iter_fn *fn, > } > blk_queue_exit(q); > } > +EXPORT_SYMBOL_GPL(blk_mq_queue_tag_busy_iter); > > static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, > bool round_robin, int node) > diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h > index 61deab0b5a5a..5af7ff94b400 100644 > --- a/block/blk-mq-tag.h > +++ b/block/blk-mq-tag.h > @@ -33,8 +33,6 @@ extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx > *hctx, > struct blk_mq_tags **tags, > unsigned int depth, bool can_grow); > extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); > -void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, > - void *priv); > > static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt, > struct blk_mq_hw_ctx *hctx) > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 58778992eca4..c3a2a7fe3f88 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -1347,18 +1347,6 @@ static inline void f2fs_update_time(struct > f2fs_sb_info *sbi, int type) > sbi->last_time[type] = jiffies; > } > > -static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) > -{ > - unsigned long interval = sbi->interval_time[type] * HZ; > - > - return time_after(jiffies, sbi->last_time[type] + interval); > -} > - > -static inline bool is_idle(struct f2fs_sb_info *sbi) > -{ > - return f2fs_time_over(sbi, REQ_TIME); > -} > - > /* > * Inline functions > */ > @@ -2948,6 +2936,7 @@ void f2fs_destroy_segment_manager_caches(void); > int f2fs_rw_hint_to_seg_type(enum rw_hint hint); > enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, > enum page_type type, enum temp_type temp); > +bool f2fs_is_idle(struct f2fs_sb_info *sbi); > > /* > * checkpoint.c > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c > index 5c8d00422237..fb4152527cf1 100644 > --- a/fs/f2fs/gc.c > +++ b/fs/f2fs/gc.c > @@ -83,7 +83,7 @@ static int gc_thread_func(void *data) > if (!mutex_trylock(&sbi->gc_mutex)) > goto next; > > - if (!is_idle(sbi)) { > + if (!f2fs_is_idle(sbi)) { > increase_sleep_time(gc_th, &wait_ms); > mutex_unlock(&sbi->gc_mutex); > goto next; > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 30779aaa9dba..a02c5ecfb2e4 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -11,7 +11,7 @@ > #include <linux/fs.h> > #include <linux/f2fs_fs.h> > #include <linux/bio.h> > -#include <linux/blkdev.h> > +#include <linux/blk-mq.h> > #include <linux/prefetch.h> > #include <linux/kthread.h> > #include <linux/swap.h> > @@ -33,6 +33,37 @@ static struct kmem_cache *discard_cmd_slab; > static struct kmem_cache *sit_entry_set_slab; > static struct kmem_cache *inmem_entry_slab; > > +static bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) > +{ > + unsigned long interval = sbi->interval_time[type] * HZ; > + > + return time_after(jiffies, sbi->last_time[type] + interval); > +} > + > +static void is_idle_fn(struct blk_mq_hw_ctx *hctx, struct request *req, > + void *data, bool reserved) > +{ > + unsigned int *cnt = data; > + > + (*cnt)++; > +} > + > +bool f2fs_is_idle(struct f2fs_sb_info *sbi) > +{ > + struct block_device *bdev = sbi->sb->s_bdev; > + struct request_queue *q = bdev_get_queue(bdev); > + > + if (q->mq_ops) { > + unsigned int cnt = 0; > + > + blk_mq_queue_tag_busy_iter(q, is_idle_fn, &cnt); > + if (cnt) > + return false; > + } > + > + return f2fs_time_over(sbi, REQ_TIME); > +} > + > static unsigned long __reverse_ulong(unsigned char *str) > { > unsigned long tmp = 0; > @@ -511,7 +542,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) > else > f2fs_build_free_nids(sbi, false, false); > > - if (!is_idle(sbi) && > + if (!f2fs_is_idle(sbi) && > (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) > return; > > @@ -1311,7 +1342,7 @@ static unsigned int __issue_discard_cmd_orderly(struct > f2fs_sb_info *sbi, > if (dc->state != D_PREP) > goto next; > > - if (dpolicy->io_aware && !is_idle(sbi)) { > + if (dpolicy->io_aware && !f2fs_is_idle(sbi)) { > io_interrupted = true; > break; > } > @@ -1371,7 +1402,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, > f2fs_bug_on(sbi, dc->state != D_PREP); > > if (dpolicy->io_aware && i < dpolicy->io_aware_gran && > - !is_idle(sbi)) { > + > !f2fs_is_idle(sbi)) { > io_interrupted = true; > break; > } > diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h > index 2286dc12c6bc..095b086fb20e 100644 > --- a/include/linux/blk-mq.h > +++ b/include/linux/blk-mq.h > @@ -281,6 +281,8 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool > async); > void blk_mq_run_hw_queues(struct request_queue *q, bool async); > void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, > busy_tag_iter_fn *fn, void *priv); > +void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, > + void *priv); > void blk_mq_freeze_queue(struct request_queue *q); > void blk_mq_unfreeze_queue(struct request_queue *q); > void blk_freeze_queue_start(struct request_queue *q); > > -- > Jens Axboe _______________________________________________ Linux-f2fs-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
