Hi Chao, This is really hard to review and risky a lot to apply it shortly. Do we have a strong reason we have to do this? The original design goal was to minimize allocation delay which is almost zero for now. Of course, I agreed that there'd be some trade-off though, we don't have a critical issue with this, FWIW. Can we expect to see some speed gains with this? How much?
Thanks, On 11/30, Chao Yu wrote: > Previously, we use free nid list to manage free nid entry, so during nid > allocation, we can just pick up one entry from list header, which has > quite low overhead. > > But sadly, during initialization of free nid list, we should do lookup > combining with lots of different inner caches, including NAT page cache, > nat entry cache, curseg journal cache and free nid bitmap, so flow became > quite complicated. > > In this patch, we choose to obsolete old free nid management approach, > instead, we use existing free nid bitmap which has the same functionality > to manage free nid, in order to make free nid management codes more easy > to maintain. > > Signed-off-by: Chao Yu <yuch...@huawei.com> > --- > v4: clean up codes. > fs/f2fs/checkpoint.c | 1 - > fs/f2fs/debug.c | 7 +- > fs/f2fs/f2fs.h | 9 +- > fs/f2fs/inode.c | 2 + > fs/f2fs/node.c | 487 > +++++++++++++++++---------------------------------- > fs/f2fs/node.h | 22 --- > fs/f2fs/segment.c | 5 - > fs/f2fs/shrinker.c | 14 -- > 8 files changed, 164 insertions(+), 383 deletions(-) > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c > index d1f160ae4959..885525a0d981 100644 > --- a/fs/f2fs/checkpoint.c > +++ b/fs/f2fs/checkpoint.c > @@ -1024,7 +1024,6 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) > struct f2fs_nm_info *nm_i = NM_I(sbi); > nid_t last_nid = nm_i->next_scan_nid; > > - next_free_nid(sbi, &last_nid); > ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); > ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); > ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); > diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c > index a66107b5cfff..413e031b10c4 100644 > --- a/fs/f2fs/debug.c > +++ b/fs/f2fs/debug.c > @@ -100,9 +100,8 @@ static void update_general_status(struct f2fs_sb_info > *sbi) > si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; > si->sits = MAIN_SEGS(sbi); > si->dirty_sits = SIT_I(sbi)->dirty_sentries; > - si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID]; > + si->free_nids = NM_I(sbi)->available_free_nids; > si->avail_nids = NM_I(sbi)->available_nids; > - si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; > si->bg_gc = sbi->bg_gc; > si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) > * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) > @@ -233,10 +232,6 @@ static void update_mem_info(struct f2fs_sb_info *sbi) > atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); > } > > - /* free nids */ > - si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] + > - NM_I(sbi)->nid_cnt[PREALLOC_NID]) * > - sizeof(struct free_nid); > si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); > si->cache_mem += NM_I(sbi)->dirty_nat_cnt * > sizeof(struct nat_entry_set); > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index d92eba66263c..f08e0feb38c1 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -723,14 +723,13 @@ struct f2fs_nm_info { > unsigned int nat_blocks; /* # of nat blocks */ > > /* free node ids management */ > - struct radix_tree_root free_nid_root;/* root of the free_nid cache */ > - struct list_head free_nid_list; /* list for free nids excluding > preallocated nids */ > - unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */ > - spinlock_t nid_list_lock; /* protect nid lists ops */ > + spinlock_t free_nid_lock; /* protect nid lists ops */ > struct mutex build_lock; /* lock for build free nids */ > unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; > + unsigned char (*prealloc_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; > unsigned char *nat_block_bitmap; > unsigned short *free_nid_count; /* free nid count of NAT block */ > + unsigned int available_free_nids; /* available free nid count in > bitmaps */ > > /* for checkpoint */ > char *nat_bitmap; /* NAT bitmap pointer */ > @@ -2622,11 +2621,9 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct > inode *inode, > struct writeback_control *wbc, bool atomic); > int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, > bool do_balance, enum iostat_type io_type); > -void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); > bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); > void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); > void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); > -int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); > void recover_inline_xattr(struct inode *inode, struct page *page); > int recover_xattr_data(struct inode *inode, struct page *page); > int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c > index 9684d53563f1..82f543e5c35b 100644 > --- a/fs/f2fs/inode.c > +++ b/fs/f2fs/inode.c > @@ -559,7 +559,9 @@ void f2fs_evict_inode(struct inode *inode) > add_ino_entry(sbi, inode->i_ino, UPDATE_INO); > } > if (is_inode_flag_set(inode, FI_FREE_NID)) { > + f2fs_lock_op(sbi); > alloc_nid_failed(sbi, inode->i_ino); > + f2fs_unlock_op(sbi); > clear_inode_flag(inode, FI_FREE_NID); > } else { > f2fs_bug_on(sbi, err && > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c > index 80c37a094631..d2c9dcb0cbf8 100644 > --- a/fs/f2fs/node.c > +++ b/fs/f2fs/node.c > @@ -23,10 +23,7 @@ > #include "trace.h" > #include <trace/events/f2fs.h> > > -#define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) > - > static struct kmem_cache *nat_entry_slab; > -static struct kmem_cache *free_nid_slab; > static struct kmem_cache *nat_entry_set_slab; > > bool available_free_memory(struct f2fs_sb_info *sbi, int type) > @@ -43,13 +40,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int > type) > avail_ram = val.totalram - val.totalhigh; > > /* > - * give 25%, 25%, 50%, 50%, 50% memory for each components respectively > + * give 25%, 50%, 50%, 50% memory for each components respectively > */ > - if (type == FREE_NIDS) { > - mem_size = (nm_i->nid_cnt[FREE_NID] * > - sizeof(struct free_nid)) >> PAGE_SHIFT; > - res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); > - } else if (type == NAT_ENTRIES) { > + if (type == NAT_ENTRIES) { > mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> > PAGE_SHIFT; > res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); > @@ -1774,259 +1767,171 @@ const struct address_space_operations > f2fs_node_aops = { > #endif > }; > > -static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, > - nid_t n) > -{ > - return radix_tree_lookup(&nm_i->free_nid_root, n); > -} > - > -static int __insert_free_nid(struct f2fs_sb_info *sbi, > - struct free_nid *i, enum nid_state state) > -{ > - struct f2fs_nm_info *nm_i = NM_I(sbi); > - > - int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); > - if (err) > - return err; > - > - f2fs_bug_on(sbi, state != i->state); > - nm_i->nid_cnt[state]++; > - if (state == FREE_NID) > - list_add_tail(&i->list, &nm_i->free_nid_list); > - return 0; > -} > - > -static void __remove_free_nid(struct f2fs_sb_info *sbi, > - struct free_nid *i, enum nid_state state) > -{ > - struct f2fs_nm_info *nm_i = NM_I(sbi); > - > - f2fs_bug_on(sbi, state != i->state); > - nm_i->nid_cnt[state]--; > - if (state == FREE_NID) > - list_del(&i->list); > - radix_tree_delete(&nm_i->free_nid_root, i->nid); > -} > - > -static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i, > - enum nid_state org_state, enum nid_state dst_state) > -{ > - struct f2fs_nm_info *nm_i = NM_I(sbi); > - > - f2fs_bug_on(sbi, org_state != i->state); > - i->state = dst_state; > - nm_i->nid_cnt[org_state]--; > - nm_i->nid_cnt[dst_state]++; > - > - switch (dst_state) { > - case PREALLOC_NID: > - list_del(&i->list); > - break; > - case FREE_NID: > - list_add_tail(&i->list, &nm_i->free_nid_list); > - break; > - default: > - BUG_ON(1); > - } > -} > - > -static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, > - bool set, bool build) > +static bool update_free_nid_bitmap(struct f2fs_sb_info *sbi, > + nid_t nid, bool set) > { > struct f2fs_nm_info *nm_i = NM_I(sbi); > unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); > unsigned int nid_ofs = nid - START_NID(nid); > > if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) > - return; > + return false; > > if (set) { > if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) > - return; > + return false; > __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); > nm_i->free_nid_count[nat_ofs]++; > + nm_i->available_free_nids++; > } else { > if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) > - return; > + return false; > __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); > - if (!build) > - nm_i->free_nid_count[nat_ofs]--; > + nm_i->free_nid_count[nat_ofs]--; > + nm_i->available_free_nids--; > + } > + return true; > +} > + > +static void update_prealloc_nid_bitmap(struct f2fs_sb_info *sbi, > + nid_t nid, bool set) > +{ > + struct f2fs_nm_info *nm_i = NM_I(sbi); > + unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); > + unsigned int nid_ofs = nid - START_NID(nid); > + > + if (set) { > + f2fs_bug_on(sbi, test_bit_le(nid_ofs, > + nm_i->prealloc_nid_bitmap[nat_ofs])); > + __set_bit_le(nid_ofs, nm_i->prealloc_nid_bitmap[nat_ofs]); > + } else { > + f2fs_bug_on(sbi, !test_bit_le(nid_ofs, > + nm_i->prealloc_nid_bitmap[nat_ofs])); > + __clear_bit_le(nid_ofs, nm_i->prealloc_nid_bitmap[nat_ofs]); > } > } > > /* return if the nid is recognized as free */ > -static bool add_free_nid(struct f2fs_sb_info *sbi, > - nid_t nid, bool build, bool update) > +static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid) > { > struct f2fs_nm_info *nm_i = NM_I(sbi); > - struct free_nid *i, *e; > struct nat_entry *ne; > - int err = -EINVAL; > - bool ret = false; > + unsigned int nat_ofs, nid_ofs; > > /* 0 nid should not be used */ > if (unlikely(nid == 0)) > return false; > > - i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); > - i->nid = nid; > - i->state = FREE_NID; > - > - radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); > - > - spin_lock(&nm_i->nid_list_lock); > + ne = __lookup_nat_cache(nm_i, nid); > + if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || > + nat_get_blkaddr(ne) != NULL_ADDR)) > + return false; > > - if (build) { > - /* > - * Thread A Thread B > - * - f2fs_create > - * - f2fs_new_inode > - * - alloc_nid > - * - __insert_nid_to_list(PREALLOC_NID) > - * - f2fs_balance_fs_bg > - * - build_free_nids > - * - __build_free_nids > - * - scan_nat_page > - * - add_free_nid > - * - __lookup_nat_cache > - * - f2fs_add_link > - * - init_inode_metadata > - * - new_inode_page > - * - new_node_page > - * - set_node_addr > - * - alloc_nid_done > - * - __remove_nid_from_list(PREALLOC_NID) > - * - __insert_nid_to_list(FREE_NID) > - */ > - ne = __lookup_nat_cache(nm_i, nid); > - if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || > - nat_get_blkaddr(ne) != NULL_ADDR)) > - goto err_out; > - > - e = __lookup_free_nid_list(nm_i, nid); > - if (e) { > - if (e->state == FREE_NID) > - ret = true; > - goto err_out; > - } > - } > - ret = true; > - err = __insert_free_nid(sbi, i, FREE_NID); > -err_out: > - if (update) { > - update_free_nid_bitmap(sbi, nid, ret, build); > - if (!build) > - nm_i->available_nids++; > - } > - spin_unlock(&nm_i->nid_list_lock); > - radix_tree_preload_end(); > + nat_ofs = NAT_BLOCK_OFFSET(nid); > + nid_ofs = nid - START_NID(nid); > + if (test_bit_le(nid_ofs, nm_i->prealloc_nid_bitmap[nat_ofs])) > + return false; > > - if (err) > - kmem_cache_free(free_nid_slab, i); > - return ret; > + update_free_nid_bitmap(sbi, nid, true); > + return true; > } > > -static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) > +static void scan_curseg_cache(struct f2fs_sb_info *sbi, nid_t start_nid, > + nid_t end_nid) > { > - struct f2fs_nm_info *nm_i = NM_I(sbi); > - struct free_nid *i; > - bool need_free = false; > + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); > + struct f2fs_journal *journal = curseg->journal; > + int i; > > - spin_lock(&nm_i->nid_list_lock); > - i = __lookup_free_nid_list(nm_i, nid); > - if (i && i->state == FREE_NID) { > - __remove_free_nid(sbi, i, FREE_NID); > - need_free = true; > - } > - spin_unlock(&nm_i->nid_list_lock); > + down_read(&curseg->journal_rwsem); > + for (i = 0; i < nats_in_cursum(journal); i++) { > + block_t addr; > + nid_t nid = le32_to_cpu(nid_in_journal(journal, i)); > + > + if (nid < start_nid || nid >= end_nid) > + continue; > + > + addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); > + > + f2fs_bug_on(sbi, addr == NEW_ADDR); > > - if (need_free) > - kmem_cache_free(free_nid_slab, i); > + spin_lock(&NM_I(sbi)->free_nid_lock); > + if (addr == NULL_ADDR) > + add_free_nid(sbi, nid); > + else > + update_free_nid_bitmap(sbi, nid, false); > + spin_unlock(&NM_I(sbi)->free_nid_lock); > + } > + up_read(&curseg->journal_rwsem); > } > > -static void scan_nat_page(struct f2fs_sb_info *sbi, > - struct page *nat_page, nid_t start_nid) > +static void scan_nat_page(struct f2fs_sb_info *sbi, nid_t nid) > { > struct f2fs_nm_info *nm_i = NM_I(sbi); > - struct f2fs_nat_block *nat_blk = page_address(nat_page); > + struct page *page; > + struct f2fs_nat_block *nat_blk; > block_t blk_addr; > - unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); > + unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); > + nid_t start_nid = nid; > int i; > > + if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) > + return; > + > + page = get_current_nat_page(sbi, nid); > + nat_blk = page_address(page); > + > __set_bit_le(nat_ofs, nm_i->nat_block_bitmap); > > - i = start_nid % NAT_ENTRY_PER_BLOCK; > + i = nid % NAT_ENTRY_PER_BLOCK; > > - for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { > - if (unlikely(start_nid >= nm_i->max_nid)) > + for (; i < NAT_ENTRY_PER_BLOCK; i++, nid++) { > + if (unlikely(nid >= nm_i->max_nid)) > break; > > blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); > f2fs_bug_on(sbi, blk_addr == NEW_ADDR); > if (blk_addr == NULL_ADDR) { > - add_free_nid(sbi, start_nid, true, true); > - } else { > - spin_lock(&NM_I(sbi)->nid_list_lock); > - update_free_nid_bitmap(sbi, start_nid, false, true); > - spin_unlock(&NM_I(sbi)->nid_list_lock); > + spin_lock(&nm_i->free_nid_lock); > + add_free_nid(sbi, nid); > + spin_unlock(&nm_i->free_nid_lock); > } > } > -} > - > -static void scan_curseg_cache(struct f2fs_sb_info *sbi) > -{ > - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); > - struct f2fs_journal *journal = curseg->journal; > - int i; > > - down_read(&curseg->journal_rwsem); > - for (i = 0; i < nats_in_cursum(journal); i++) { > - block_t addr; > - nid_t nid; > + f2fs_put_page(page, 1); > > - addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); > - nid = le32_to_cpu(nid_in_journal(journal, i)); > - if (addr == NULL_ADDR) > - add_free_nid(sbi, nid, true, false); > - else > - remove_free_nid(sbi, nid); > - } > - up_read(&curseg->journal_rwsem); > + /* find free nids from current sum_pages */ > + scan_curseg_cache(sbi, start_nid, start_nid + NAT_ENTRY_PER_BLOCK); > } > > -static void scan_free_nid_bits(struct f2fs_sb_info *sbi) > +static nid_t lookup_free_nid_bitmap(struct f2fs_sb_info *sbi) > { > struct f2fs_nm_info *nm_i = NM_I(sbi); > - unsigned int i, idx; > - nid_t nid; > - > - down_read(&nm_i->nat_tree_lock); > + unsigned int i; > + nid_t nid = 0; > > for (i = 0; i < nm_i->nat_blocks; i++) { > + unsigned int idx = 0; > + > if (!test_bit_le(i, nm_i->nat_block_bitmap)) > continue; > if (!nm_i->free_nid_count[i]) > continue; > - for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { > - idx = find_next_bit_le(nm_i->free_nid_bitmap[i], > - NAT_ENTRY_PER_BLOCK, idx); > - if (idx >= NAT_ENTRY_PER_BLOCK) > - break; > > - nid = i * NAT_ENTRY_PER_BLOCK + idx; > - add_free_nid(sbi, nid, true, false); > + idx = find_next_bit_le(nm_i->free_nid_bitmap[i], > + NAT_ENTRY_PER_BLOCK, idx); > + if (idx >= NAT_ENTRY_PER_BLOCK) > + continue; > > - if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS) > - goto out; > - } > + nid = i * NAT_ENTRY_PER_BLOCK + idx; > + break; > } > -out: > - scan_curseg_cache(sbi); > > - up_read(&nm_i->nat_tree_lock); > + f2fs_bug_on(sbi, !nid); > + return nid; > } > > -static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool > mount) > +static void __build_free_nids(struct f2fs_sb_info *sbi, bool mount) > { > struct f2fs_nm_info *nm_i = NM_I(sbi); > int i = 0; > @@ -2036,59 +1941,36 @@ static void __build_free_nids(struct f2fs_sb_info > *sbi, bool sync, bool mount) > nid = 0; > > /* Enough entries */ > - if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) > - return; > - > - if (!sync && !available_free_memory(sbi, FREE_NIDS)) > + if (!mount && nm_i->available_free_nids >= NAT_ENTRY_PER_BLOCK) > return; > > - if (!mount) { > - /* try to find free nids in free_nid_bitmap */ > - scan_free_nid_bits(sbi); > - > - if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) > - return; > - } > - > /* readahead nat pages to be scanned */ > ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, > META_NAT, true); > > down_read(&nm_i->nat_tree_lock); > > - while (1) { > - if (!test_bit_le(NAT_BLOCK_OFFSET(nid), > - nm_i->nat_block_bitmap)) { > - struct page *page = get_current_nat_page(sbi, nid); > - > - scan_nat_page(sbi, page, nid); > - f2fs_put_page(page, 1); > - } > + do { > + scan_nat_page(sbi, nid); > > nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); > if (unlikely(nid >= nm_i->max_nid)) > nid = 0; > - > - if (++i >= FREE_NID_PAGES) > - break; > - } > + } while (++i < FREE_NID_PAGES); > > /* go to the next free nat pages to find free nids abundantly */ > nm_i->next_scan_nid = nid; > > - /* find free nids from current sum_pages */ > - scan_curseg_cache(sbi); > - > up_read(&nm_i->nat_tree_lock); > > ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), > nm_i->ra_nid_pages, META_NAT, false); > } > > -void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) > +void build_free_nids(struct f2fs_sb_info *sbi, bool mount) > { > mutex_lock(&NM_I(sbi)->build_lock); > - __build_free_nids(sbi, sync, mount); > + __build_free_nids(sbi, mount); > mutex_unlock(&NM_I(sbi)->build_lock); > } > > @@ -2100,7 +1982,6 @@ void build_free_nids(struct f2fs_sb_info *sbi, bool > sync, bool mount) > bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) > { > struct f2fs_nm_info *nm_i = NM_I(sbi); > - struct free_nid *i = NULL; > retry: > #ifdef CONFIG_F2FS_FAULT_INJECTION > if (time_to_inject(sbi, FAULT_ALLOC_NID)) { > @@ -2108,32 +1989,34 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) > return false; > } > #endif > - spin_lock(&nm_i->nid_list_lock); > + > + spin_lock(&nm_i->free_nid_lock); > > if (unlikely(nm_i->available_nids == 0)) { > - spin_unlock(&nm_i->nid_list_lock); > + spin_unlock(&nm_i->free_nid_lock); > return false; > } > > /* We should not use stale free nids created by build_free_nids */ > - if (nm_i->nid_cnt[FREE_NID] && !on_build_free_nids(nm_i)) { > - f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); > - i = list_first_entry(&nm_i->free_nid_list, > - struct free_nid, list); > - *nid = i->nid; > + if (nm_i->available_free_nids && mutex_trylock(&nm_i->build_lock)) { > + bool updated; > + > + *nid = lookup_free_nid_bitmap(sbi); > > - __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID); > + updated = update_free_nid_bitmap(sbi, *nid, false); > + f2fs_bug_on(sbi, !updated); > nm_i->available_nids--; > > - update_free_nid_bitmap(sbi, *nid, false, false); > + update_prealloc_nid_bitmap(sbi, *nid, true); > > - spin_unlock(&nm_i->nid_list_lock); > + spin_unlock(&nm_i->free_nid_lock); > + mutex_unlock(&nm_i->build_lock); > return true; > } > - spin_unlock(&nm_i->nid_list_lock); > + spin_unlock(&nm_i->free_nid_lock); > > /* Let's scan nat pages and its caches to get free nids */ > - build_free_nids(sbi, true, false); > + build_free_nids(sbi, false); > goto retry; > } > > @@ -2143,15 +2026,10 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) > void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) > { > struct f2fs_nm_info *nm_i = NM_I(sbi); > - struct free_nid *i; > - > - spin_lock(&nm_i->nid_list_lock); > - i = __lookup_free_nid_list(nm_i, nid); > - f2fs_bug_on(sbi, !i); > - __remove_free_nid(sbi, i, PREALLOC_NID); > - spin_unlock(&nm_i->nid_list_lock); > > - kmem_cache_free(free_nid_slab, i); > + spin_lock(&nm_i->free_nid_lock); > + update_prealloc_nid_bitmap(sbi, nid, false); > + spin_unlock(&nm_i->free_nid_lock); > } > > /* > @@ -2160,59 +2038,21 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t > nid) > void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) > { > struct f2fs_nm_info *nm_i = NM_I(sbi); > - struct free_nid *i; > - bool need_free = false; > > if (!nid) > return; > > - spin_lock(&nm_i->nid_list_lock); > - i = __lookup_free_nid_list(nm_i, nid); > - f2fs_bug_on(sbi, !i); > - > - if (!available_free_memory(sbi, FREE_NIDS)) { > - __remove_free_nid(sbi, i, PREALLOC_NID); > - need_free = true; > - } else { > - __move_free_nid(sbi, i, PREALLOC_NID, FREE_NID); > - } > - > - nm_i->available_nids++; > - > - update_free_nid_bitmap(sbi, nid, true, false); > - > - spin_unlock(&nm_i->nid_list_lock); > - > - if (need_free) > - kmem_cache_free(free_nid_slab, i); > -} > - > -int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) > -{ > - struct f2fs_nm_info *nm_i = NM_I(sbi); > - struct free_nid *i, *next; > - int nr = nr_shrink; > - > - if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) > - return 0; > - > - if (!mutex_trylock(&nm_i->build_lock)) > - return 0; > + mutex_lock(&nm_i->build_lock); > + down_read(&nm_i->nat_tree_lock); > + spin_lock(&nm_i->free_nid_lock); > > - spin_lock(&nm_i->nid_list_lock); > - list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { > - if (nr_shrink <= 0 || > - nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) > - break; > + update_prealloc_nid_bitmap(sbi, nid, false); > > - __remove_free_nid(sbi, i, FREE_NID); > - kmem_cache_free(free_nid_slab, i); > - nr_shrink--; > - } > - spin_unlock(&nm_i->nid_list_lock); > + if (add_free_nid(sbi, nid)) > + nm_i->available_nids++; > + spin_unlock(&nm_i->free_nid_lock); > + up_read(&nm_i->nat_tree_lock); > mutex_unlock(&nm_i->build_lock); > - > - return nr - nr_shrink; > } > > void recover_inline_xattr(struct inode *inode, struct page *page) > @@ -2303,7 +2143,10 @@ int recover_inode_page(struct f2fs_sb_info *sbi, > struct page *page) > } > > /* Should not use this inode from free nid list */ > - remove_free_nid(sbi, ino); > + spin_lock(&NM_I(sbi)->free_nid_lock); > + update_free_nid_bitmap(sbi, ino, false); > + NM_I(sbi)->available_nids--; > + spin_unlock(&NM_I(sbi)->free_nid_lock); > > if (!PageUptodate(ipage)) > SetPageUptodate(ipage); > @@ -2408,9 +2251,9 @@ static void remove_nats_in_journal(struct f2fs_sb_info > *sbi) > */ > if (!get_nat_flag(ne, IS_DIRTY) && > le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) { > - spin_lock(&nm_i->nid_list_lock); > + spin_lock(&nm_i->free_nid_lock); > nm_i->available_nids--; > - spin_unlock(&nm_i->nid_list_lock); > + spin_unlock(&nm_i->free_nid_lock); > } > > __set_nat_cache_dirty(nm_i, ne); > @@ -2519,11 +2362,14 @@ static void __flush_nat_entry_set(struct f2fs_sb_info > *sbi, > nat_reset_flag(ne); > __clear_nat_cache_dirty(NM_I(sbi), set, ne); > if (nat_get_blkaddr(ne) == NULL_ADDR) { > - add_free_nid(sbi, nid, false, true); > + spin_lock(&NM_I(sbi)->free_nid_lock); > + add_free_nid(sbi, nid); > + NM_I(sbi)->available_nids++; > + spin_unlock(&NM_I(sbi)->free_nid_lock); > } else { > - spin_lock(&NM_I(sbi)->nid_list_lock); > - update_free_nid_bitmap(sbi, nid, false, false); > - spin_unlock(&NM_I(sbi)->nid_list_lock); > + spin_lock(&NM_I(sbi)->free_nid_lock); > + update_free_nid_bitmap(sbi, nid, false); > + spin_unlock(&NM_I(sbi)->free_nid_lock); > } > } > > @@ -2647,10 +2493,10 @@ static inline void load_free_nid_bitmap(struct > f2fs_sb_info *sbi) > nid = i * NAT_ENTRY_PER_BLOCK; > last_nid = nid + NAT_ENTRY_PER_BLOCK; > > - spin_lock(&NM_I(sbi)->nid_list_lock); > + spin_lock(&NM_I(sbi)->free_nid_lock); > for (; nid < last_nid; nid++) > - update_free_nid_bitmap(sbi, nid, true, true); > - spin_unlock(&NM_I(sbi)->nid_list_lock); > + update_free_nid_bitmap(sbi, nid, true); > + spin_unlock(&NM_I(sbi)->free_nid_lock); > } > > for (i = 0; i < nm_i->nat_blocks; i++) { > @@ -2660,6 +2506,8 @@ static inline void load_free_nid_bitmap(struct > f2fs_sb_info *sbi) > > __set_bit_le(i, nm_i->nat_block_bitmap); > } > + > + scan_curseg_cache(sbi, 0, nm_i->max_nid); > } > > static int init_node_manager(struct f2fs_sb_info *sbi) > @@ -2680,21 +2528,18 @@ static int init_node_manager(struct f2fs_sb_info *sbi) > /* not used nids: 0, node, meta, (and root counted as valid node) */ > nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - > sbi->nquota_files - F2FS_RESERVED_NODE_NUM; > - nm_i->nid_cnt[FREE_NID] = 0; > - nm_i->nid_cnt[PREALLOC_NID] = 0; > + nm_i->available_free_nids = 0; > nm_i->nat_cnt = 0; > nm_i->ram_thresh = DEF_RAM_THRESHOLD; > nm_i->ra_nid_pages = DEF_RA_NID_PAGES; > nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; > > - INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); > - INIT_LIST_HEAD(&nm_i->free_nid_list); > INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); > INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); > INIT_LIST_HEAD(&nm_i->nat_entries); > > mutex_init(&nm_i->build_lock); > - spin_lock_init(&nm_i->nid_list_lock); > + spin_lock_init(&nm_i->free_nid_lock); > init_rwsem(&nm_i->nat_tree_lock); > > nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); > @@ -2731,6 +2576,11 @@ static int init_free_nid_cache(struct f2fs_sb_info > *sbi) > if (!nm_i->free_nid_bitmap) > return -ENOMEM; > > + nm_i->prealloc_nid_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks * > + NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL); > + if (!nm_i->prealloc_nid_bitmap) > + return -ENOMEM; > + > nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8, > GFP_KERNEL); > if (!nm_i->nat_block_bitmap) > @@ -2763,14 +2613,13 @@ int build_node_manager(struct f2fs_sb_info *sbi) > /* load free nid status from nat_bits table */ > load_free_nid_bitmap(sbi); > > - build_free_nids(sbi, true, true); > + build_free_nids(sbi, true); > return 0; > } > > void destroy_node_manager(struct f2fs_sb_info *sbi) > { > struct f2fs_nm_info *nm_i = NM_I(sbi); > - struct free_nid *i, *next_i; > struct nat_entry *natvec[NATVEC_SIZE]; > struct nat_entry_set *setvec[SETVEC_SIZE]; > nid_t nid = 0; > @@ -2779,19 +2628,6 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) > if (!nm_i) > return; > > - /* destroy free nid list */ > - spin_lock(&nm_i->nid_list_lock); > - list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { > - __remove_free_nid(sbi, i, FREE_NID); > - spin_unlock(&nm_i->nid_list_lock); > - kmem_cache_free(free_nid_slab, i); > - spin_lock(&nm_i->nid_list_lock); > - } > - f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]); > - f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]); > - f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list)); > - spin_unlock(&nm_i->nid_list_lock); > - > /* destroy nat cache */ > down_write(&nm_i->nat_tree_lock); > while ((found = __gang_lookup_nat_cache(nm_i, > @@ -2822,6 +2658,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) > > kvfree(nm_i->nat_block_bitmap); > kvfree(nm_i->free_nid_bitmap); > + kvfree(nm_i->prealloc_nid_bitmap); > kvfree(nm_i->free_nid_count); > > kfree(nm_i->nat_bitmap); > @@ -2840,19 +2677,12 @@ int __init create_node_manager_caches(void) > if (!nat_entry_slab) > goto fail; > > - free_nid_slab = f2fs_kmem_cache_create("free_nid", > - sizeof(struct free_nid)); > - if (!free_nid_slab) > - goto destroy_nat_entry; > - > nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", > sizeof(struct nat_entry_set)); > if (!nat_entry_set_slab) > - goto destroy_free_nid; > + goto destroy_nat_entry; > return 0; > > -destroy_free_nid: > - kmem_cache_destroy(free_nid_slab); > destroy_nat_entry: > kmem_cache_destroy(nat_entry_slab); > fail: > @@ -2862,6 +2692,5 @@ int __init create_node_manager_caches(void) > void destroy_node_manager_caches(void) > { > kmem_cache_destroy(nat_entry_set_slab); > - kmem_cache_destroy(free_nid_slab); > kmem_cache_destroy(nat_entry_slab); > } > diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h > index 0ee3e5ff49a3..d5cf8af70c90 100644 > --- a/fs/f2fs/node.h > +++ b/fs/f2fs/node.h > @@ -135,7 +135,6 @@ static inline bool excess_cached_nats(struct f2fs_sb_info > *sbi) > } > > enum mem_type { > - FREE_NIDS, /* indicates the free nid list */ > NAT_ENTRIES, /* indicates the cached nat entry */ > DIRTY_DENTS, /* indicates dirty dentry pages */ > INO_ENTRIES, /* indicates inode entries */ > @@ -151,27 +150,6 @@ struct nat_entry_set { > unsigned int entry_cnt; /* the # of nat entries in set */ > }; > > -struct free_nid { > - struct list_head list; /* for free node id list */ > - nid_t nid; /* node id */ > - int state; /* in use or not: FREE_NID or PREALLOC_NID */ > -}; > - > -static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) > -{ > - struct f2fs_nm_info *nm_i = NM_I(sbi); > - struct free_nid *fnid; > - > - spin_lock(&nm_i->nid_list_lock); > - if (nm_i->nid_cnt[FREE_NID] <= 0) { > - spin_unlock(&nm_i->nid_list_lock); > - return; > - } > - fnid = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); > - *nid = fnid->nid; > - spin_unlock(&nm_i->nid_list_lock); > -} > - > /* > * inline functions > */ > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 176a2b97e6d3..c9a0563907aa 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -482,11 +482,6 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) > if (!available_free_memory(sbi, NAT_ENTRIES)) > try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); > > - if (!available_free_memory(sbi, FREE_NIDS)) > - try_to_free_nids(sbi, MAX_FREE_NIDS); > - else > - build_free_nids(sbi, false, false); > - > if (!is_idle(sbi) && !excess_dirty_nats(sbi)) > return; > > diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c > index 0b5664a1a6cc..7123bcb3cb62 100644 > --- a/fs/f2fs/shrinker.c > +++ b/fs/f2fs/shrinker.c > @@ -26,13 +26,6 @@ static unsigned long __count_nat_entries(struct > f2fs_sb_info *sbi) > return count > 0 ? count : 0; > } > > -static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) > -{ > - long count = NM_I(sbi)->nid_cnt[FREE_NID] - MAX_FREE_NIDS; > - > - return count > 0 ? count : 0; > -} > - > static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) > { > return atomic_read(&sbi->total_zombie_tree) + > @@ -64,9 +57,6 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, > /* shrink clean nat cache entries */ > count += __count_nat_entries(sbi); > > - /* count free nids cache entries */ > - count += __count_free_nids(sbi); > - > spin_lock(&f2fs_list_lock); > p = p->next; > mutex_unlock(&sbi->umount_mutex); > @@ -111,10 +101,6 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink, > if (freed < nr) > freed += try_to_free_nats(sbi, nr - freed); > > - /* shrink free nids cache entries */ > - if (freed < nr) > - freed += try_to_free_nids(sbi, nr - freed); > - > spin_lock(&f2fs_list_lock); > p = p->next; > list_move_tail(&sbi->s_list, &f2fs_list); > -- > 2.15.0.55.gc2ece9dc4de6