On 02/27, Chao Yu wrote:
> On 2017/2/26 2:34, Jaegeuk Kim wrote:
> > On 02/25, Chao Yu wrote:
> >> On 2017/2/24 6:54, Jaegeuk Kim wrote:
> >>> On 02/23, Chao Yu wrote:
> >>>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > ...
> >>>
> >>>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> >>>>> +{
> >>>>> +       struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> +       struct page *page;
> >>>>> +       unsigned int i = 0;
> >>>>> +       nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> >>>>> +       nid_t nid;
> >>>>> +
> >>>>> +       if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>>>> +               return -EAGAIN;
> >>>>> +
> >>>>> +       down_read(&nm_i->nat_tree_lock);
> >>>>> +check_empty:
> >>>>> +       i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> >>>>> +       if (i >= nm_i->nat_blocks) {
> >>>>> +               i = 0;
> >>>>> +               goto check_partial;
> >>>>> +       }
> >>>>> +
> >>>>> +       for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * 
> >>>>> NAT_ENTRY_PER_BLOCK;
> >>>>> +                                                                       
> >>>>> nid++) {
> >>>>> +               if (unlikely(nid >= nm_i->max_nid))
> >>>>> +                       break;
> >>>>> +               add_free_nid(sbi, nid, true);
> >>>>> +       }
> >>>>> +
> >>>>> +       if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> >>>>> +               goto out;
> >>>>> +       i++;
> >>>>> +       goto check_empty;
> >>>>> +
> >>>>> +check_partial:
> >>>>> +       i = find_next_zero_bit_le(nm_i->full_nat_bits, 
> >>>>> nm_i->nat_blocks, i);
> >>>>> +       if (i >= nm_i->nat_blocks) {
> >>>>> +               disable_nat_bits(sbi, true);
> >>>>
> >>>> Can this happen in real world? Should be a bug in somewhere?
> >>>
> >>> It happens, since current design handles full_nat_bits optionally in order
> >>> to avoid scanning a whole NAT page to set it back as 1 from 0.
> >>
> >> All 0 value in full_nat_bits means the NAT block has at least one free nid,
> >> right? so if we traverse all such NAT blocks, and still haven't collect 
> >> enough
> >> *target* number of free nids, we don't have to disable nat bit cache, we 
> >> can
> >> just break out here.
> > 
> > No, I'm seeing this case:
> > 1. mount with full=0, empty=0, which indicates some free nids.
> > 2. allocate all there-in free nids, but still full=0, empty=0.
> > 3. allocate more free nids.
> > ...
> > ... checkpoint makes full=1, empty=0
> > 
> > If there is no checkpoint and it consumes all the free nids, we can see all > > 0
> > value in full_nat_bits which is quite impossible. In that case, I'd prefer
> > to stop nat_bits and give fsck.f2fs to revive it back.
> 
> Yeah, I can understand that, but what I concern is when there is few free nids
> (< target), we still try to load nids of 8 NAT blocks until ending the loop of
> caching free nids, so it will be very easy to enter the case of disabling
> nid_bits cache here, so how about doing more check?
> 
> if (i >= nm_i->nat_blocks &&

This indicates full_nat_bits consists of all zeros, which means, whatever in
the next time, we need to return -EINVAL. If we do not disable it here, there is
no way to turn any bit into zero.

BTW, I'm trying to freeze all the patches for a pull request, so let me have
another patch for any pending issues which are not urgent for now as I think.

Thanks,

>       nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
>       disable_nat_bits
> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>>>
> >>>>> +               return -EINVAL;
> >>>>> +       }
> >>>>> +
> >>>>> +       nid = i * NAT_ENTRY_PER_BLOCK;
> >>>>> +       page = get_current_nat_page(sbi, nid);
> >>>>> +       scan_nat_page(sbi, page, nid);
> >>>>> +       f2fs_put_page(page, 1);
> >>>>> +
> >>>>> +       if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> >>>>> +               i++;
> >>>>> +               goto check_partial;
> >>>>> +       }
> >>>>> +out:
> >>>>> +       up_read(&nm_i->nat_tree_lock);
> >>>>> +       return 0;
> >>>>> +}
> >>>>> +
> >>>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, 
> >>>>> bool mount)
> >>>>>  {
> >>>>>         struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>>         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct 
> >>>>> f2fs_sb_info *sbi, bool sync)
> >>>>>         if (!sync && !available_free_memory(sbi, FREE_NIDS))
> >>>>>                 return;
> >>>>>  
> >>>>> +       /* try to find free nids with nat_bits */
> >>>>> +       if (!mount && !scan_nat_bits(sbi) && 
> >>>>> nm_i->nid_cnt[FREE_NID_LIST])
> >>>>> +               return;
> >>>>> +
> >>>>> +       /* find next valid candidate */
> >>>>
> >>>> This is just for mount case?
> >>>
> >>> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as 
> >>> full
> >>> NAT pages.
> >>>
> >>> Thanks,
> >>>
> >>>>
> >>>>> +       if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> >>>>> +               int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> >>>>> +                                       nm_i->nat_blocks, 0);
> >>>>> +               if (idx >= nm_i->nat_blocks)
> >>>>> +                       set_sbi_flag(sbi, SBI_NEED_FSCK);
> >>>>> +               else
> >>>>> +                       nid = idx * NAT_ENTRY_PER_BLOCK;
> >>>>> +       }
> >>>>> +
> >>>>>         /* readahead nat pages to be scanned */
> >>>>>         ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> >>>>>                                                         META_NAT, true);
> >>>>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct 
> >>>>> f2fs_sb_info *sbi, bool sync)
> >>>>>                                         nm_i->ra_nid_pages, META_NAT, 
> >>>>> false);
> >>>>>  }
> >>>>>  
> >>>>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>>>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >>>>>  {
> >>>>>         mutex_lock(&NM_I(sbi)->build_lock);
> >>>>> -       __build_free_nids(sbi, sync);
> >>>>> +       __build_free_nids(sbi, sync, mount);
> >>>>>         mutex_unlock(&NM_I(sbi)->build_lock);
> >>>>>  }
> >>>>>  
> >>>>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t 
> >>>>> *nid)
> >>>>>         spin_unlock(&nm_i->nid_list_lock);
> >>>>>  
> >>>>>         /* Let's scan nat pages and its caches to get free nids */
> >>>>> -       build_free_nids(sbi, true);
> >>>>> +       build_free_nids(sbi, true, false);
> >>>>>         goto retry;
> >>>>>  }
> >>>>>  
> >>>>> @@ -2233,8 +2304,39 @@ static void __adjust_nat_entry_set(struct 
> >>>>> nat_entry_set *nes,
> >>>>>         list_add_tail(&nes->set_list, head);
> >>>>>  }
> >>>>>  
> >>>>> +void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
> >>>>> +                                               struct page *page)
> >>>>> +{
> >>>>> +       struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> +       unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
> >>>>> +       struct f2fs_nat_block *nat_blk = page_address(page);
> >>>>> +       int valid = 0;
> >>>>> +       int i;
> >>>>> +
> >>>>> +       if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>>>> +               return;
> >>>>> +
> >>>>> +       for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
> >>>>> +               if (start_nid == 0 && i == 0)
> >>>>> +                       valid++;
> >>>>> +               if (nat_blk->entries[i].block_addr)
> >>>>> +                       valid++;
> >>>>> +       }
> >>>>> +       if (valid == 0) {
> >>>>> +               test_and_set_bit_le(nat_index, nm_i->empty_nat_bits);
> >>>>> +               test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> >>>>
> >>>> set_bit_le/clear_bit_le
> >>>>
> >>>>> +               return;
> >>>>> +       }
> >>>>> +
> >>>>> +       test_and_clear_bit_le(nat_index, nm_i->empty_nat_bits);
> >>>>
> >>>> ditto
> >>>>
> >>>>> +       if (valid == NAT_ENTRY_PER_BLOCK)
> >>>>> +               test_and_set_bit_le(nat_index, nm_i->full_nat_bits);
> >>>>> +       else
> >>>>> +               test_and_clear_bit_le(nat_index, nm_i->full_nat_bits);
> >>>>
> >>>> ditto
> >>>>
> >>>>> +}
> >>>>> +
> >>>>>  static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
> >>>>> -                                       struct nat_entry_set *set)
> >>>>> +               struct nat_entry_set *set, struct cp_control *cpc)
> >>>>>  {
> >>>>>         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>>>>         struct f2fs_journal *journal = curseg->journal;
> >>>>> @@ -2249,7 +2351,8 @@ static void __flush_nat_entry_set(struct 
> >>>>> f2fs_sb_info *sbi,
> >>>>>          * #1, flush nat entries to journal in current hot data summary 
> >>>>> block.
> >>>>>          * #2, flush nat entries to nat page.
> >>>>>          */
> >>>>> -       if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> >>>>> +       if (cpc->reason == CP_UMOUNT ||
> >>>>
> >>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, 
> >>>> CP_NAT_BITS_FLAG)) ||
> >>>>
> >>>>> +               !__has_cursum_space(journal, set->entry_cnt, 
> >>>>> NAT_JOURNAL))
> >>>>>                 to_journal = false;
> >>>>>  
> >>>>>         if (to_journal) {
> >>>>> @@ -2289,10 +2392,12 @@ static void __flush_nat_entry_set(struct 
> >>>>> f2fs_sb_info *sbi,
> >>>>>                 }
> >>>>>         }
> >>>>>  
> >>>>> -       if (to_journal)
> >>>>> +       if (to_journal) {
> >>>>>                 up_write(&curseg->journal_rwsem);
> >>>>> -       else
> >>>>> +       } else {
> >>>>> +               __update_nat_bits(sbi, start_nid, page);
> >>>>>                 f2fs_put_page(page, 1);
> >>>>> +       }
> >>>>>  
> >>>>>         f2fs_bug_on(sbi, set->entry_cnt);
> >>>>>  
> >>>>> @@ -2303,7 +2408,7 @@ static void __flush_nat_entry_set(struct 
> >>>>> f2fs_sb_info *sbi,
> >>>>>  /*
> >>>>>   * This function is called during the checkpointing process.
> >>>>>   */
> >>>>> -void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>>> +void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control 
> >>>>> *cpc)
> >>>>>  {
> >>>>>         struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>>         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>>>> @@ -2324,7 +2429,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>>>          * entries, remove all entries from journal and merge them
> >>>>>          * into nat entry set.
> >>>>>          */
> >>>>> -       if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, 
> >>>>> NAT_JOURNAL))
> >>>>> +       if (cpc->reason == CP_UMOUNT ||
> >>>>
> >>>> if ((cpc->reason == CP_UMOUNT && is_set_ckpt_flags(sbi, 
> >>>> CP_NAT_BITS_FLAG)) ||
> >>>>
> >>>>> +               !__has_cursum_space(journal, nm_i->dirty_nat_cnt, 
> >>>>> NAT_JOURNAL))
> >>>>>                 remove_nats_in_journal(sbi);
> >>>>>  
> >>>>>         while ((found = __gang_lookup_nat_set(nm_i,
> >>>>> @@ -2338,27 +2444,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
> >>>>>  
> >>>>>         /* flush dirty nats in nat entry set */
> >>>>>         list_for_each_entry_safe(set, tmp, &sets, set_list)
> >>>>> -               __flush_nat_entry_set(sbi, set);
> >>>>> +               __flush_nat_entry_set(sbi, set, cpc);
> >>>>>  
> >>>>>         up_write(&nm_i->nat_tree_lock);
> >>>>>  
> >>>>>         f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
> >>>>>  }
> >>>>>  
> >>>>> +static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
> >>>>> +{
> >>>>> +       struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> >>>>> +       struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>> +       unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
> >>>>> +       unsigned int i;
> >>>>> +       __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
> >>>>
> >>>> __u64 cp_ver = cur_cp_version(ckpt);
> >>>>
> >>>> Thanks,
> >>>>
> >>>>> +       size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
> >>>>> +       __u64 crc = le32_to_cpu(*((__le32 *)
> >>>>> +                               ((unsigned char *)ckpt + crc_offset)));
> >>>>> +       block_t nat_bits_addr;
> >>>>> +
> >>>>> +       if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>>>> +               return 0;
> >>>>> +
> >>>>> +       nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) 
> >>>>> + 8 +
> >>>>> +                                               F2FS_BLKSIZE - 1);
> >>>>> +       nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << 
> >>>>> F2FS_BLKSIZE_BITS,
> >>>>> +                                               GFP_KERNEL);
> >>>>> +       if (!nm_i->nat_bits)
> >>>>> +               return -ENOMEM;
> >>>>> +
> >>>>> +       nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
> >>>>> +                                               nm_i->nat_bits_blocks;
> >>>>> +       for (i = 0; i < nm_i->nat_bits_blocks; i++) {
> >>>>> +               struct page *page = get_meta_page(sbi, nat_bits_addr++);
> >>>>> +
> >>>>> +               memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
> >>>>> +                                       page_address(page), 
> >>>>> F2FS_BLKSIZE);
> >>>>> +               f2fs_put_page(page, 1);
> >>>>> +       }
> >>>>> +
> >>>>> +       cp_ver |= (crc << 32);
> >>>>> +       if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
> >>>>> +               disable_nat_bits(sbi, true);
> >>>>> +               return 0;
> >>>>> +       }
> >>>>> +
> >>>>> +       nm_i->full_nat_bits = nm_i->nat_bits + 8;
> >>>>> +       nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
> >>>>> +
> >>>>> +       f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
> >>>>> +       return 0;
> >>>>> +}
> >>>>> +
> >>>>>  static int init_node_manager(struct f2fs_sb_info *sbi)
> >>>>>  {
> >>>>>         struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
> >>>>>         struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>>>         unsigned char *version_bitmap;
> >>>>> -       unsigned int nat_segs, nat_blocks;
> >>>>> +       unsigned int nat_segs;
> >>>>> +       int err;
> >>>>>  
> >>>>>         nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
> >>>>>  
> >>>>>         /* segment_count_nat includes pair segment so divide to 2. */
> >>>>>         nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
> >>>>> -       nat_blocks = nat_segs << 
> >>>>> le32_to_cpu(sb_raw->log_blocks_per_seg);
> >>>>> -
> >>>>> -       nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
> >>>>> +       nm_i->nat_blocks = nat_segs << 
> >>>>> le32_to_cpu(sb_raw->log_blocks_per_seg);
> >>>>> +       nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
> >>>>>  
> >>>>>         /* not used nids: 0, node, meta, (and root counted as valid 
> >>>>> node) */
> >>>>>         nm_i->available_nids = nm_i->max_nid - 
> >>>>> sbi->total_valid_node_count -
> >>>>> @@ -2392,6 +2543,10 @@ static int init_node_manager(struct f2fs_sb_info 
> >>>>> *sbi)
> >>>>>         if (!nm_i->nat_bitmap)
> >>>>>                 return -ENOMEM;
> >>>>>  
> >>>>> +       err = __get_nat_bitmaps(sbi);
> >>>>> +       if (err)
> >>>>> +               return err;
> >>>>> +
> >>>>>  #ifdef CONFIG_F2FS_CHECK_FS
> >>>>>         nm_i->nat_bitmap_mir = kmemdup(version_bitmap, 
> >>>>> nm_i->bitmap_size,
> >>>>>                                         GFP_KERNEL);
> >>>>> @@ -2414,7 +2569,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
> >>>>>         if (err)
> >>>>>                 return err;
> >>>>>  
> >>>>> -       build_free_nids(sbi, true);
> >>>>> +       build_free_nids(sbi, true, true);
> >>>>>         return 0;
> >>>>>  }
> >>>>>  
> >>>>> @@ -2473,6 +2628,7 @@ void destroy_node_manager(struct f2fs_sb_info 
> >>>>> *sbi)
> >>>>>         up_write(&nm_i->nat_tree_lock);
> >>>>>  
> >>>>>         kfree(nm_i->nat_bitmap);
> >>>>> +       kfree(nm_i->nat_bits);
> >>>>>  #ifdef CONFIG_F2FS_CHECK_FS
> >>>>>         kfree(nm_i->nat_bitmap_mir);
> >>>>>  #endif
> >>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >>>>> index df2ff5cfe8f4..8e1ec248c653 100644
> >>>>> --- a/fs/f2fs/segment.c
> >>>>> +++ b/fs/f2fs/segment.c
> >>>>> @@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
> >>>>>         if (!available_free_memory(sbi, FREE_NIDS))
> >>>>>                 try_to_free_nids(sbi, MAX_FREE_NIDS);
> >>>>>         else
> >>>>> -               build_free_nids(sbi, false);
> >>>>> +               build_free_nids(sbi, false, false);
> >>>>>  
> >>>>>         if (!is_idle(sbi))
> >>>>>                 return;
> >>>>> diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
> >>>>> index f0748524ca8c..1c92ace2e8f8 100644
> >>>>> --- a/include/linux/f2fs_fs.h
> >>>>> +++ b/include/linux/f2fs_fs.h
> >>>>> @@ -114,6 +114,7 @@ struct f2fs_super_block {
> >>>>>  /*
> >>>>>   * For checkpoint
> >>>>>   */
> >>>>> +#define CP_NAT_BITS_FLAG       0x00000080
> >>>>>  #define CP_CRC_RECOVERY_FLAG   0x00000040
> >>>>>  #define CP_FASTBOOT_FLAG       0x00000020
> >>>>>  #define CP_FSCK_FLAG           0x00000010
> >>>>>
> >>>>
> >>>>
> >>>> ------------------------------------------------------------------------------
> >>>> Check out the vibrant tech community on one of the world's most
> >>>> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> >>>> _______________________________________________
> >>>> Linux-f2fs-devel mailing list
> >>>> linux-f2fs-de...@lists.sourceforge.net
> >>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> >>>
> >>> .
> >>>
> >>
> >>
> >> ------------------------------------------------------------------------------
> >> Check out the vibrant tech community on one of the world's most
> >> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> >> _______________________________________________
> >> Linux-f2fs-devel mailing list
> >> linux-f2fs-de...@lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > 
> > .
> > 
> 
> 
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, SlashDot.org! http://sdm.link/slashdot
> _______________________________________________
> Linux-f2fs-devel mailing list
> linux-f2fs-de...@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to