Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-28 Thread Chao Yu
On 2017/2/28 6:19, Jaegeuk Kim wrote:
> On 02/27, Chao Yu wrote:
>> On 2017/2/26 2:34, Jaegeuk Kim wrote:
>>> On 02/25, Chao Yu wrote:
 On 2017/2/24 6:54, Jaegeuk Kim wrote:
> On 02/23, Chao Yu wrote:
>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
>>> ...
>
>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
>>> +{
>>> +   struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> +   struct page *page;
>>> +   unsigned int i = 0;
>>> +   nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
>>> +   nid_t nid;
>>> +
>>> +   if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>> +   return -EAGAIN;
>>> +
>>> +   down_read(_i->nat_tree_lock);
>>> +check_empty:
>>> +   i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
>>> +   if (i >= nm_i->nat_blocks) {
>>> +   i = 0;
>>> +   goto check_partial;
>>> +   }
>>> +
>>> +   for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * 
>>> NAT_ENTRY_PER_BLOCK;
>>> +   
>>> nid++) {
>>> +   if (unlikely(nid >= nm_i->max_nid))
>>> +   break;
>>> +   add_free_nid(sbi, nid, true);
>>> +   }
>>> +
>>> +   if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
>>> +   goto out;
>>> +   i++;
>>> +   goto check_empty;
>>> +
>>> +check_partial:
>>> +   i = find_next_zero_bit_le(nm_i->full_nat_bits, 
>>> nm_i->nat_blocks, i);
>>> +   if (i >= nm_i->nat_blocks) {
>>> +   disable_nat_bits(sbi, true);
>>
>> Can this happen in real world? Should be a bug in somewhere?
>
> It happens, since current design handles full_nat_bits optionally in order
> to avoid scanning a whole NAT page to set it back as 1 from 0.

 All 0 value in full_nat_bits means the NAT block has at least one free nid,
 right? so if we traverse all such NAT blocks, and still haven't collect 
 enough
 *target* number of free nids, we don't have to disable nat bit cache, we 
 can
 just break out here.
>>>
>>> No, I'm seeing this case:
>>> 1. mount with full=0, empty=0, which indicates some free nids.
>>> 2. allocate all there-in free nids, but still full=0, empty=0.
>>> 3. allocate more free nids.
>>> ...
>>> ... checkpoint makes full=1, empty=0
>>>
>>> If there is no checkpoint and it consumes all the free nids, we can see all >>> 0
>>> value in full_nat_bits which is quite impossible. In that case, I'd prefer
>>> to stop nat_bits and give fsck.f2fs to revive it back.
>>
>> Yeah, I can understand that, but what I concern is when there is few free 
>> nids
>> (< target), we still try to load nids of 8 NAT blocks until ending the loop 
>> of
>> caching free nids, so it will be very easy to enter the case of disabling
>> nid_bits cache here, so how about doing more check?
>>
>> if (i >= nm_i->nat_blocks &&
> 
> This indicates full_nat_bits consists of all zeros, which means, whatever in
> the next time, we need to return -EINVAL. If we do not disable it here, there 
> is
> no way to turn any bit into zero.

We can expect late checkpoint will refresh full_nat_bits, __update_nat_bits will
turn bit to one or zero there.

> 
> BTW, I'm trying to freeze all the patches for a pull request, so let me have
> another patch for any pending issues which are not urgent for now as I think.

Got it. I will send you patch if I find explicit bug. :)

Thanks,

> 
> Thanks,
> 
>>  nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
>>  disable_nat_bits
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>

 Thanks,

>
>>
>>> +   return -EINVAL;
>>> +   }
>>> +
>>> +   nid = i * NAT_ENTRY_PER_BLOCK;
>>> +   page = get_current_nat_page(sbi, nid);
>>> +   scan_nat_page(sbi, page, nid);
>>> +   f2fs_put_page(page, 1);
>>> +
>>> +   if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
>>> +   i++;
>>> +   goto check_partial;
>>> +   }
>>> +out:
>>> +   up_read(_i->nat_tree_lock);
>>> +   return 0;
>>> +}
>>> +
>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, 
>>> bool mount)
>>>  {
>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct 
>>> f2fs_sb_info *sbi, bool sync)
>>> if (!sync && !available_free_memory(sbi, FREE_NIDS))
>>> return;
>>>  
>>> +   /* try to find free nids with nat_bits */
>>> +   if (!mount && !scan_nat_bits(sbi) && 
>>> nm_i->nid_cnt[FREE_NID_LIST])
>>> +   return;
>>> +
>>> +   /* find 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-28 Thread Chao Yu
On 2017/2/28 6:19, Jaegeuk Kim wrote:
> On 02/27, Chao Yu wrote:
>> On 2017/2/26 2:34, Jaegeuk Kim wrote:
>>> On 02/25, Chao Yu wrote:
 On 2017/2/24 6:54, Jaegeuk Kim wrote:
> On 02/23, Chao Yu wrote:
>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
>>> ...
>
>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
>>> +{
>>> +   struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> +   struct page *page;
>>> +   unsigned int i = 0;
>>> +   nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
>>> +   nid_t nid;
>>> +
>>> +   if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>> +   return -EAGAIN;
>>> +
>>> +   down_read(_i->nat_tree_lock);
>>> +check_empty:
>>> +   i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
>>> +   if (i >= nm_i->nat_blocks) {
>>> +   i = 0;
>>> +   goto check_partial;
>>> +   }
>>> +
>>> +   for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * 
>>> NAT_ENTRY_PER_BLOCK;
>>> +   
>>> nid++) {
>>> +   if (unlikely(nid >= nm_i->max_nid))
>>> +   break;
>>> +   add_free_nid(sbi, nid, true);
>>> +   }
>>> +
>>> +   if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
>>> +   goto out;
>>> +   i++;
>>> +   goto check_empty;
>>> +
>>> +check_partial:
>>> +   i = find_next_zero_bit_le(nm_i->full_nat_bits, 
>>> nm_i->nat_blocks, i);
>>> +   if (i >= nm_i->nat_blocks) {
>>> +   disable_nat_bits(sbi, true);
>>
>> Can this happen in real world? Should be a bug in somewhere?
>
> It happens, since current design handles full_nat_bits optionally in order
> to avoid scanning a whole NAT page to set it back as 1 from 0.

 All 0 value in full_nat_bits means the NAT block has at least one free nid,
 right? so if we traverse all such NAT blocks, and still haven't collect 
 enough
 *target* number of free nids, we don't have to disable nat bit cache, we 
 can
 just break out here.
>>>
>>> No, I'm seeing this case:
>>> 1. mount with full=0, empty=0, which indicates some free nids.
>>> 2. allocate all there-in free nids, but still full=0, empty=0.
>>> 3. allocate more free nids.
>>> ...
>>> ... checkpoint makes full=1, empty=0
>>>
>>> If there is no checkpoint and it consumes all the free nids, we can see all >>> 0
>>> value in full_nat_bits which is quite impossible. In that case, I'd prefer
>>> to stop nat_bits and give fsck.f2fs to revive it back.
>>
>> Yeah, I can understand that, but what I concern is when there is few free 
>> nids
>> (< target), we still try to load nids of 8 NAT blocks until ending the loop 
>> of
>> caching free nids, so it will be very easy to enter the case of disabling
>> nid_bits cache here, so how about doing more check?
>>
>> if (i >= nm_i->nat_blocks &&
> 
> This indicates full_nat_bits consists of all zeros, which means, whatever in
> the next time, we need to return -EINVAL. If we do not disable it here, there 
> is
> no way to turn any bit into zero.

We can expect late checkpoint will refresh full_nat_bits, __update_nat_bits will
turn bit to one or zero there.

> 
> BTW, I'm trying to freeze all the patches for a pull request, so let me have
> another patch for any pending issues which are not urgent for now as I think.

Got it. I will send you patch if I find explicit bug. :)

Thanks,

> 
> Thanks,
> 
>>  nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
>>  disable_nat_bits
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>

 Thanks,

>
>>
>>> +   return -EINVAL;
>>> +   }
>>> +
>>> +   nid = i * NAT_ENTRY_PER_BLOCK;
>>> +   page = get_current_nat_page(sbi, nid);
>>> +   scan_nat_page(sbi, page, nid);
>>> +   f2fs_put_page(page, 1);
>>> +
>>> +   if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
>>> +   i++;
>>> +   goto check_partial;
>>> +   }
>>> +out:
>>> +   up_read(_i->nat_tree_lock);
>>> +   return 0;
>>> +}
>>> +
>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, 
>>> bool mount)
>>>  {
>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct 
>>> f2fs_sb_info *sbi, bool sync)
>>> if (!sync && !available_free_memory(sbi, FREE_NIDS))
>>> return;
>>>  
>>> +   /* try to find free nids with nat_bits */
>>> +   if (!mount && !scan_nat_bits(sbi) && 
>>> nm_i->nid_cnt[FREE_NID_LIST])
>>> +   return;
>>> +
>>> +   /* find 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-27 Thread Jaegeuk Kim
On 02/27, Chao Yu wrote:
> On 2017/2/26 2:34, Jaegeuk Kim wrote:
> > On 02/25, Chao Yu wrote:
> >> On 2017/2/24 6:54, Jaegeuk Kim wrote:
> >>> On 02/23, Chao Yu wrote:
>  On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > ...
> >>>
> > +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> > +{
> > +   struct f2fs_nm_info *nm_i = NM_I(sbi);
> > +   struct page *page;
> > +   unsigned int i = 0;
> > +   nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> > +   nid_t nid;
> > +
> > +   if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > +   return -EAGAIN;
> > +
> > +   down_read(_i->nat_tree_lock);
> > +check_empty:
> > +   i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> > +   if (i >= nm_i->nat_blocks) {
> > +   i = 0;
> > +   goto check_partial;
> > +   }
> > +
> > +   for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * 
> > NAT_ENTRY_PER_BLOCK;
> > +   
> > nid++) {
> > +   if (unlikely(nid >= nm_i->max_nid))
> > +   break;
> > +   add_free_nid(sbi, nid, true);
> > +   }
> > +
> > +   if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> > +   goto out;
> > +   i++;
> > +   goto check_empty;
> > +
> > +check_partial:
> > +   i = find_next_zero_bit_le(nm_i->full_nat_bits, 
> > nm_i->nat_blocks, i);
> > +   if (i >= nm_i->nat_blocks) {
> > +   disable_nat_bits(sbi, true);
> 
>  Can this happen in real world? Should be a bug in somewhere?
> >>>
> >>> It happens, since current design handles full_nat_bits optionally in order
> >>> to avoid scanning a whole NAT page to set it back as 1 from 0.
> >>
> >> All 0 value in full_nat_bits means the NAT block has at least one free nid,
> >> right? so if we traverse all such NAT blocks, and still haven't collect 
> >> enough
> >> *target* number of free nids, we don't have to disable nat bit cache, we 
> >> can
> >> just break out here.
> > 
> > No, I'm seeing this case:
> > 1. mount with full=0, empty=0, which indicates some free nids.
> > 2. allocate all there-in free nids, but still full=0, empty=0.
> > 3. allocate more free nids.
> > ...
> > ... checkpoint makes full=1, empty=0
> > 
> > If there is no checkpoint and it consumes all the free nids, we can see all > > 0
> > value in full_nat_bits which is quite impossible. In that case, I'd prefer
> > to stop nat_bits and give fsck.f2fs to revive it back.
> 
> Yeah, I can understand that, but what I concern is when there is few free nids
> (< target), we still try to load nids of 8 NAT blocks until ending the loop of
> caching free nids, so it will be very easy to enter the case of disabling
> nid_bits cache here, so how about doing more check?
> 
> if (i >= nm_i->nat_blocks &&

This indicates full_nat_bits consists of all zeros, which means, whatever in
the next time, we need to return -EINVAL. If we do not disable it here, there is
no way to turn any bit into zero.

BTW, I'm trying to freeze all the patches for a pull request, so let me have
another patch for any pending issues which are not urgent for now as I think.

Thanks,

>   nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
>   disable_nat_bits
> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >>
> >> Thanks,
> >>
> >>>
> 
> > +   return -EINVAL;
> > +   }
> > +
> > +   nid = i * NAT_ENTRY_PER_BLOCK;
> > +   page = get_current_nat_page(sbi, nid);
> > +   scan_nat_page(sbi, page, nid);
> > +   f2fs_put_page(page, 1);
> > +
> > +   if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> > +   i++;
> > +   goto check_partial;
> > +   }
> > +out:
> > +   up_read(_i->nat_tree_lock);
> > +   return 0;
> > +}
> > +
> > +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, 
> > bool mount)
> >  {
> > struct f2fs_nm_info *nm_i = NM_I(sbi);
> > struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct 
> > f2fs_sb_info *sbi, bool sync)
> > if (!sync && !available_free_memory(sbi, FREE_NIDS))
> > return;
> >  
> > +   /* try to find free nids with nat_bits */
> > +   if (!mount && !scan_nat_bits(sbi) && 
> > nm_i->nid_cnt[FREE_NID_LIST])
> > +   return;
> > +
> > +   /* find next valid candidate */
> 
>  This is just for mount case?
> >>>
> >>> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as 
> >>> full
> >>> NAT pages.
> >>>
> >>> Thanks,
> >>>
> 
> > +   if (is_set_ckpt_flags(sbi, 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-27 Thread Jaegeuk Kim
On 02/27, Chao Yu wrote:
> On 2017/2/26 2:34, Jaegeuk Kim wrote:
> > On 02/25, Chao Yu wrote:
> >> On 2017/2/24 6:54, Jaegeuk Kim wrote:
> >>> On 02/23, Chao Yu wrote:
>  On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > ...
> >>>
> > +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> > +{
> > +   struct f2fs_nm_info *nm_i = NM_I(sbi);
> > +   struct page *page;
> > +   unsigned int i = 0;
> > +   nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> > +   nid_t nid;
> > +
> > +   if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > +   return -EAGAIN;
> > +
> > +   down_read(_i->nat_tree_lock);
> > +check_empty:
> > +   i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> > +   if (i >= nm_i->nat_blocks) {
> > +   i = 0;
> > +   goto check_partial;
> > +   }
> > +
> > +   for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * 
> > NAT_ENTRY_PER_BLOCK;
> > +   
> > nid++) {
> > +   if (unlikely(nid >= nm_i->max_nid))
> > +   break;
> > +   add_free_nid(sbi, nid, true);
> > +   }
> > +
> > +   if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> > +   goto out;
> > +   i++;
> > +   goto check_empty;
> > +
> > +check_partial:
> > +   i = find_next_zero_bit_le(nm_i->full_nat_bits, 
> > nm_i->nat_blocks, i);
> > +   if (i >= nm_i->nat_blocks) {
> > +   disable_nat_bits(sbi, true);
> 
>  Can this happen in real world? Should be a bug in somewhere?
> >>>
> >>> It happens, since current design handles full_nat_bits optionally in order
> >>> to avoid scanning a whole NAT page to set it back as 1 from 0.
> >>
> >> All 0 value in full_nat_bits means the NAT block has at least one free nid,
> >> right? so if we traverse all such NAT blocks, and still haven't collect 
> >> enough
> >> *target* number of free nids, we don't have to disable nat bit cache, we 
> >> can
> >> just break out here.
> > 
> > No, I'm seeing this case:
> > 1. mount with full=0, empty=0, which indicates some free nids.
> > 2. allocate all there-in free nids, but still full=0, empty=0.
> > 3. allocate more free nids.
> > ...
> > ... checkpoint makes full=1, empty=0
> > 
> > If there is no checkpoint and it consumes all the free nids, we can see all > > 0
> > value in full_nat_bits which is quite impossible. In that case, I'd prefer
> > to stop nat_bits and give fsck.f2fs to revive it back.
> 
> Yeah, I can understand that, but what I concern is when there is few free nids
> (< target), we still try to load nids of 8 NAT blocks until ending the loop of
> caching free nids, so it will be very easy to enter the case of disabling
> nid_bits cache here, so how about doing more check?
> 
> if (i >= nm_i->nat_blocks &&

This indicates full_nat_bits consists of all zeros, which means, whatever in
the next time, we need to return -EINVAL. If we do not disable it here, there is
no way to turn any bit into zero.

BTW, I'm trying to freeze all the patches for a pull request, so let me have
another patch for any pending issues which are not urgent for now as I think.

Thanks,

>   nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
>   disable_nat_bits
> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >>
> >> Thanks,
> >>
> >>>
> 
> > +   return -EINVAL;
> > +   }
> > +
> > +   nid = i * NAT_ENTRY_PER_BLOCK;
> > +   page = get_current_nat_page(sbi, nid);
> > +   scan_nat_page(sbi, page, nid);
> > +   f2fs_put_page(page, 1);
> > +
> > +   if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> > +   i++;
> > +   goto check_partial;
> > +   }
> > +out:
> > +   up_read(_i->nat_tree_lock);
> > +   return 0;
> > +}
> > +
> > +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, 
> > bool mount)
> >  {
> > struct f2fs_nm_info *nm_i = NM_I(sbi);
> > struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct 
> > f2fs_sb_info *sbi, bool sync)
> > if (!sync && !available_free_memory(sbi, FREE_NIDS))
> > return;
> >  
> > +   /* try to find free nids with nat_bits */
> > +   if (!mount && !scan_nat_bits(sbi) && 
> > nm_i->nid_cnt[FREE_NID_LIST])
> > +   return;
> > +
> > +   /* find next valid candidate */
> 
>  This is just for mount case?
> >>>
> >>> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as 
> >>> full
> >>> NAT pages.
> >>>
> >>> Thanks,
> >>>
> 
> > +   if (is_set_ckpt_flags(sbi, 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-26 Thread Chao Yu
On 2017/2/26 2:34, Jaegeuk Kim wrote:
> On 02/25, Chao Yu wrote:
>> On 2017/2/24 6:54, Jaegeuk Kim wrote:
>>> On 02/23, Chao Yu wrote:
 On 2017/2/14 10:06, Jaegeuk Kim wrote:
> ...
>>>
> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> +{
> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> + struct page *page;
> + unsigned int i = 0;
> + nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> + nid_t nid;
> +
> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> + return -EAGAIN;
> +
> + down_read(_i->nat_tree_lock);
> +check_empty:
> + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> + if (i >= nm_i->nat_blocks) {
> + i = 0;
> + goto check_partial;
> + }
> +
> + for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> + nid++) {
> + if (unlikely(nid >= nm_i->max_nid))
> + break;
> + add_free_nid(sbi, nid, true);
> + }
> +
> + if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> + goto out;
> + i++;
> + goto check_empty;
> +
> +check_partial:
> + i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> + if (i >= nm_i->nat_blocks) {
> + disable_nat_bits(sbi, true);

 Can this happen in real world? Should be a bug in somewhere?
>>>
>>> It happens, since current design handles full_nat_bits optionally in order
>>> to avoid scanning a whole NAT page to set it back as 1 from 0.
>>
>> All 0 value in full_nat_bits means the NAT block has at least one free nid,
>> right? so if we traverse all such NAT blocks, and still haven't collect 
>> enough
>> *target* number of free nids, we don't have to disable nat bit cache, we can
>> just break out here.
> 
> No, I'm seeing this case:
> 1. mount with full=0, empty=0, which indicates some free nids.
> 2. allocate all there-in free nids, but still full=0, empty=0.
> 3. allocate more free nids.
> ...
> ... checkpoint makes full=1, empty=0
> 
> If there is no checkpoint and it consumes all the free nids, we can see all 0
> value in full_nat_bits which is quite impossible. In that case, I'd prefer
> to stop nat_bits and give fsck.f2fs to revive it back.

Yeah, I can understand that, but what I concern is when there is few free nids
(< target), we still try to load nids of 8 NAT blocks until ending the loop of
caching free nids, so it will be very easy to enter the case of disabling
nid_bits cache here, so how about doing more check?

if (i >= nm_i->nat_blocks &&
nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
disable_nat_bits

Thanks,

> 
> Thanks,
> 
>>
>> Thanks,
>>
>>>

> + return -EINVAL;
> + }
> +
> + nid = i * NAT_ENTRY_PER_BLOCK;
> + page = get_current_nat_page(sbi, nid);
> + scan_nat_page(sbi, page, nid);
> + f2fs_put_page(page, 1);
> +
> + if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> + i++;
> + goto check_partial;
> + }
> +out:
> + up_read(_i->nat_tree_lock);
> + return 0;
> +}
> +
> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool 
> mount)
>  {
>   struct f2fs_nm_info *nm_i = NM_I(sbi);
>   struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info 
> *sbi, bool sync)
>   if (!sync && !available_free_memory(sbi, FREE_NIDS))
>   return;
>  
> + /* try to find free nids with nat_bits */
> + if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> + return;
> +
> + /* find next valid candidate */

 This is just for mount case?
>>>
>>> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as 
>>> full
>>> NAT pages.
>>>
>>> Thanks,
>>>

> + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> + int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> + nm_i->nat_blocks, 0);
> + if (idx >= nm_i->nat_blocks)
> + set_sbi_flag(sbi, SBI_NEED_FSCK);
> + else
> + nid = idx * NAT_ENTRY_PER_BLOCK;
> + }
> +
>   /* readahead nat pages to be scanned */
>   ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>   META_NAT, true);
> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info 
> *sbi, bool sync)
>   nm_i->ra_nid_pages, META_NAT, false);
>  }
>  
> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>  {
>   mutex_lock(_I(sbi)->build_lock);
> - __build_free_nids(sbi, 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-26 Thread Chao Yu
On 2017/2/26 2:34, Jaegeuk Kim wrote:
> On 02/25, Chao Yu wrote:
>> On 2017/2/24 6:54, Jaegeuk Kim wrote:
>>> On 02/23, Chao Yu wrote:
 On 2017/2/14 10:06, Jaegeuk Kim wrote:
> ...
>>>
> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> +{
> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> + struct page *page;
> + unsigned int i = 0;
> + nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> + nid_t nid;
> +
> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> + return -EAGAIN;
> +
> + down_read(_i->nat_tree_lock);
> +check_empty:
> + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> + if (i >= nm_i->nat_blocks) {
> + i = 0;
> + goto check_partial;
> + }
> +
> + for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> + nid++) {
> + if (unlikely(nid >= nm_i->max_nid))
> + break;
> + add_free_nid(sbi, nid, true);
> + }
> +
> + if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> + goto out;
> + i++;
> + goto check_empty;
> +
> +check_partial:
> + i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> + if (i >= nm_i->nat_blocks) {
> + disable_nat_bits(sbi, true);

 Can this happen in real world? Should be a bug in somewhere?
>>>
>>> It happens, since current design handles full_nat_bits optionally in order
>>> to avoid scanning a whole NAT page to set it back as 1 from 0.
>>
>> All 0 value in full_nat_bits means the NAT block has at least one free nid,
>> right? so if we traverse all such NAT blocks, and still haven't collect 
>> enough
>> *target* number of free nids, we don't have to disable nat bit cache, we can
>> just break out here.
> 
> No, I'm seeing this case:
> 1. mount with full=0, empty=0, which indicates some free nids.
> 2. allocate all there-in free nids, but still full=0, empty=0.
> 3. allocate more free nids.
> ...
> ... checkpoint makes full=1, empty=0
> 
> If there is no checkpoint and it consumes all the free nids, we can see all 0
> value in full_nat_bits which is quite impossible. In that case, I'd prefer
> to stop nat_bits and give fsck.f2fs to revive it back.

Yeah, I can understand that, but what I concern is when there is few free nids
(< target), we still try to load nids of 8 NAT blocks until ending the loop of
caching free nids, so it will be very easy to enter the case of disabling
nid_bits cache here, so how about doing more check?

if (i >= nm_i->nat_blocks &&
nm_i->nid_cnt[FREE_NID_LIST] != nm_i->available_nids)
disable_nat_bits

Thanks,

> 
> Thanks,
> 
>>
>> Thanks,
>>
>>>

> + return -EINVAL;
> + }
> +
> + nid = i * NAT_ENTRY_PER_BLOCK;
> + page = get_current_nat_page(sbi, nid);
> + scan_nat_page(sbi, page, nid);
> + f2fs_put_page(page, 1);
> +
> + if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> + i++;
> + goto check_partial;
> + }
> +out:
> + up_read(_i->nat_tree_lock);
> + return 0;
> +}
> +
> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool 
> mount)
>  {
>   struct f2fs_nm_info *nm_i = NM_I(sbi);
>   struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info 
> *sbi, bool sync)
>   if (!sync && !available_free_memory(sbi, FREE_NIDS))
>   return;
>  
> + /* try to find free nids with nat_bits */
> + if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> + return;
> +
> + /* find next valid candidate */

 This is just for mount case?
>>>
>>> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as 
>>> full
>>> NAT pages.
>>>
>>> Thanks,
>>>

> + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> + int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> + nm_i->nat_blocks, 0);
> + if (idx >= nm_i->nat_blocks)
> + set_sbi_flag(sbi, SBI_NEED_FSCK);
> + else
> + nid = idx * NAT_ENTRY_PER_BLOCK;
> + }
> +
>   /* readahead nat pages to be scanned */
>   ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
>   META_NAT, true);
> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info 
> *sbi, bool sync)
>   nm_i->ra_nid_pages, META_NAT, false);
>  }
>  
> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
>  {
>   mutex_lock(_I(sbi)->build_lock);
> - __build_free_nids(sbi, 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-25 Thread Jaegeuk Kim
On 02/25, Chao Yu wrote:
> On 2017/2/24 6:54, Jaegeuk Kim wrote:
> > On 02/23, Chao Yu wrote:
> >> On 2017/2/14 10:06, Jaegeuk Kim wrote:
...
> > 
> >>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> >>> +{
> >>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>> + struct page *page;
> >>> + unsigned int i = 0;
> >>> + nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> >>> + nid_t nid;
> >>> +
> >>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>> + return -EAGAIN;
> >>> +
> >>> + down_read(_i->nat_tree_lock);
> >>> +check_empty:
> >>> + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> >>> + if (i >= nm_i->nat_blocks) {
> >>> + i = 0;
> >>> + goto check_partial;
> >>> + }
> >>> +
> >>> + for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> >>> + nid++) {
> >>> + if (unlikely(nid >= nm_i->max_nid))
> >>> + break;
> >>> + add_free_nid(sbi, nid, true);
> >>> + }
> >>> +
> >>> + if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> >>> + goto out;
> >>> + i++;
> >>> + goto check_empty;
> >>> +
> >>> +check_partial:
> >>> + i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> >>> + if (i >= nm_i->nat_blocks) {
> >>> + disable_nat_bits(sbi, true);
> >>
> >> Can this happen in real world? Should be a bug in somewhere?
> > 
> > It happens, since current design handles full_nat_bits optionally in order
> > to avoid scanning a whole NAT page to set it back as 1 from 0.
> 
> All 0 value in full_nat_bits means the NAT block has at least one free nid,
> right? so if we traverse all such NAT blocks, and still haven't collect enough
> *target* number of free nids, we don't have to disable nat bit cache, we can
> just break out here.

No, I'm seeing this case:
1. mount with full=0, empty=0, which indicates some free nids.
2. allocate all there-in free nids, but still full=0, empty=0.
3. allocate more free nids.
...
... checkpoint makes full=1, empty=0

If there is no checkpoint and it consumes all the free nids, we can see all 0
value in full_nat_bits which is quite impossible. In that case, I'd prefer
to stop nat_bits and give fsck.f2fs to revive it back.

Thanks,

> 
> Thanks,
> 
> > 
> >>
> >>> + return -EINVAL;
> >>> + }
> >>> +
> >>> + nid = i * NAT_ENTRY_PER_BLOCK;
> >>> + page = get_current_nat_page(sbi, nid);
> >>> + scan_nat_page(sbi, page, nid);
> >>> + f2fs_put_page(page, 1);
> >>> +
> >>> + if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> >>> + i++;
> >>> + goto check_partial;
> >>> + }
> >>> +out:
> >>> + up_read(_i->nat_tree_lock);
> >>> + return 0;
> >>> +}
> >>> +
> >>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool 
> >>> mount)
> >>>  {
> >>>   struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>   struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info 
> >>> *sbi, bool sync)
> >>>   if (!sync && !available_free_memory(sbi, FREE_NIDS))
> >>>   return;
> >>>  
> >>> + /* try to find free nids with nat_bits */
> >>> + if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> >>> + return;
> >>> +
> >>> + /* find next valid candidate */
> >>
> >> This is just for mount case?
> > 
> > Yup, it reuses free nids in dirty NAT blocks, so that we can make them as 
> > full
> > NAT pages.
> > 
> > Thanks,
> > 
> >>
> >>> + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> >>> + int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> >>> + nm_i->nat_blocks, 0);
> >>> + if (idx >= nm_i->nat_blocks)
> >>> + set_sbi_flag(sbi, SBI_NEED_FSCK);
> >>> + else
> >>> + nid = idx * NAT_ENTRY_PER_BLOCK;
> >>> + }
> >>> +
> >>>   /* readahead nat pages to be scanned */
> >>>   ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> >>>   META_NAT, true);
> >>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info 
> >>> *sbi, bool sync)
> >>>   nm_i->ra_nid_pages, META_NAT, false);
> >>>  }
> >>>  
> >>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >>>  {
> >>>   mutex_lock(_I(sbi)->build_lock);
> >>> - __build_free_nids(sbi, sync);
> >>> + __build_free_nids(sbi, sync, mount);
> >>>   mutex_unlock(_I(sbi)->build_lock);
> >>>  }
> >>>  
> >>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> >>>   spin_unlock(_i->nid_list_lock);
> >>>  
> >>>   /* Let's scan nat pages and its caches to get free nids */
> >>> - build_free_nids(sbi, true);
> >>> + build_free_nids(sbi, true, false);
> >>>   goto retry;
> >>>  }
> >>>  
> >>> @@ -2233,8 +2304,39 @@ static void 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-25 Thread Jaegeuk Kim
On 02/25, Chao Yu wrote:
> On 2017/2/24 6:54, Jaegeuk Kim wrote:
> > On 02/23, Chao Yu wrote:
> >> On 2017/2/14 10:06, Jaegeuk Kim wrote:
...
> > 
> >>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> >>> +{
> >>> + struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>> + struct page *page;
> >>> + unsigned int i = 0;
> >>> + nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> >>> + nid_t nid;
> >>> +
> >>> + if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> >>> + return -EAGAIN;
> >>> +
> >>> + down_read(_i->nat_tree_lock);
> >>> +check_empty:
> >>> + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> >>> + if (i >= nm_i->nat_blocks) {
> >>> + i = 0;
> >>> + goto check_partial;
> >>> + }
> >>> +
> >>> + for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> >>> + nid++) {
> >>> + if (unlikely(nid >= nm_i->max_nid))
> >>> + break;
> >>> + add_free_nid(sbi, nid, true);
> >>> + }
> >>> +
> >>> + if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> >>> + goto out;
> >>> + i++;
> >>> + goto check_empty;
> >>> +
> >>> +check_partial:
> >>> + i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> >>> + if (i >= nm_i->nat_blocks) {
> >>> + disable_nat_bits(sbi, true);
> >>
> >> Can this happen in real world? Should be a bug in somewhere?
> > 
> > It happens, since current design handles full_nat_bits optionally in order
> > to avoid scanning a whole NAT page to set it back as 1 from 0.
> 
> All 0 value in full_nat_bits means the NAT block has at least one free nid,
> right? so if we traverse all such NAT blocks, and still haven't collect enough
> *target* number of free nids, we don't have to disable nat bit cache, we can
> just break out here.

No, I'm seeing this case:
1. mount with full=0, empty=0, which indicates some free nids.
2. allocate all there-in free nids, but still full=0, empty=0.
3. allocate more free nids.
...
... checkpoint makes full=1, empty=0

If there is no checkpoint and it consumes all the free nids, we can see all 0
value in full_nat_bits which is quite impossible. In that case, I'd prefer
to stop nat_bits and give fsck.f2fs to revive it back.

Thanks,

> 
> Thanks,
> 
> > 
> >>
> >>> + return -EINVAL;
> >>> + }
> >>> +
> >>> + nid = i * NAT_ENTRY_PER_BLOCK;
> >>> + page = get_current_nat_page(sbi, nid);
> >>> + scan_nat_page(sbi, page, nid);
> >>> + f2fs_put_page(page, 1);
> >>> +
> >>> + if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> >>> + i++;
> >>> + goto check_partial;
> >>> + }
> >>> +out:
> >>> + up_read(_i->nat_tree_lock);
> >>> + return 0;
> >>> +}
> >>> +
> >>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool 
> >>> mount)
> >>>  {
> >>>   struct f2fs_nm_info *nm_i = NM_I(sbi);
> >>>   struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> >>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info 
> >>> *sbi, bool sync)
> >>>   if (!sync && !available_free_memory(sbi, FREE_NIDS))
> >>>   return;
> >>>  
> >>> + /* try to find free nids with nat_bits */
> >>> + if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> >>> + return;
> >>> +
> >>> + /* find next valid candidate */
> >>
> >> This is just for mount case?
> > 
> > Yup, it reuses free nids in dirty NAT blocks, so that we can make them as 
> > full
> > NAT pages.
> > 
> > Thanks,
> > 
> >>
> >>> + if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> >>> + int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> >>> + nm_i->nat_blocks, 0);
> >>> + if (idx >= nm_i->nat_blocks)
> >>> + set_sbi_flag(sbi, SBI_NEED_FSCK);
> >>> + else
> >>> + nid = idx * NAT_ENTRY_PER_BLOCK;
> >>> + }
> >>> +
> >>>   /* readahead nat pages to be scanned */
> >>>   ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> >>>   META_NAT, true);
> >>> @@ -1896,10 +1967,10 @@ static void __build_free_nids(struct f2fs_sb_info 
> >>> *sbi, bool sync)
> >>>   nm_i->ra_nid_pages, META_NAT, false);
> >>>  }
> >>>  
> >>> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
> >>> +void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> >>>  {
> >>>   mutex_lock(_I(sbi)->build_lock);
> >>> - __build_free_nids(sbi, sync);
> >>> + __build_free_nids(sbi, sync, mount);
> >>>   mutex_unlock(_I(sbi)->build_lock);
> >>>  }
> >>>  
> >>> @@ -1941,7 +2012,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> >>>   spin_unlock(_i->nid_list_lock);
> >>>  
> >>>   /* Let's scan nat pages and its caches to get free nids */
> >>> - build_free_nids(sbi, true);
> >>> + build_free_nids(sbi, true, false);
> >>>   goto retry;
> >>>  }
> >>>  
> >>> @@ -2233,8 +2304,39 @@ static void 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-24 Thread Chao Yu
On 2017/2/24 6:54, Jaegeuk Kim wrote:
> On 02/23, Chao Yu wrote:
>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
>>> This patches adds bitmaps to represent empty or full NAT blocks containing
>>> free nid entries.
>>>
>>> If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
>>> use these bitmaps when building free nids. In order to avoid checkpointing
>>> burden, up-to-date bitmaps will be flushed only during umount time. So,
>>> normally we can get this gain, but when power-cut happens, we rely on 
>>> fsck.f2fs
>>> which recovers this bitmap again.
>>>
>>> After this patch, we build free nids from nid #0 at mount time to make more
>>> full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
>>> without loading any NAT pages from disk.
>>>
>>> Signed-off-by: Jaegeuk Kim 
>>> ---
>>>  fs/f2fs/checkpoint.c|  29 +++-
>>>  fs/f2fs/debug.c |   1 +
>>>  fs/f2fs/f2fs.h  |  23 +-
>>>  fs/f2fs/node.c  | 188 
>>> +++-
>>>  fs/f2fs/segment.c   |   2 +-
>>>  include/linux/f2fs_fs.h |   1 +
>>>  6 files changed, 224 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>> index 042f8d9afe44..783c5c3f16a4 100644
>>> --- a/fs/f2fs/checkpoint.c
>>> +++ b/fs/f2fs/checkpoint.c
>>> @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info 
>>> *sbi, struct cp_control *cpc)
>>>  
>>> spin_lock(>cp_lock);
>>>  
>>> +   if (ckpt->cp_pack_total_block_count >
>>> +   sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
>>> +   disable_nat_bits(sbi, false);
>>
>> I think we need to drop nat full/empty bitmap only if there is no enough 
>> space
>> in CP area while doing umount, otherwise we can keep this in memory.
> 
> Yup.
> 
>>
>>> +
>>> if (cpc->reason == CP_UMOUNT)
>>> __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
>>> else
>>> @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
>>> struct cp_control *cpc)
>>>  
>>> start_blk = __start_cp_next_addr(sbi);
>>>  
>>> +   /* write nat bits */
> 
> ...
> 
>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
>>> +{
>>> +   struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> +   struct page *page;
>>> +   unsigned int i = 0;
>>> +   nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
>>> +   nid_t nid;
>>> +
>>> +   if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>> +   return -EAGAIN;
>>> +
>>> +   down_read(_i->nat_tree_lock);
>>> +check_empty:
>>> +   i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
>>> +   if (i >= nm_i->nat_blocks) {
>>> +   i = 0;
>>> +   goto check_partial;
>>> +   }
>>> +
>>> +   for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
>>> +   nid++) {
>>> +   if (unlikely(nid >= nm_i->max_nid))
>>> +   break;
>>> +   add_free_nid(sbi, nid, true);
>>> +   }
>>> +
>>> +   if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
>>> +   goto out;
>>> +   i++;
>>> +   goto check_empty;
>>> +
>>> +check_partial:
>>> +   i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
>>> +   if (i >= nm_i->nat_blocks) {
>>> +   disable_nat_bits(sbi, true);
>>
>> Can this happen in real world? Should be a bug in somewhere?
> 
> It happens, since current design handles full_nat_bits optionally in order
> to avoid scanning a whole NAT page to set it back as 1 from 0.

All 0 value in full_nat_bits means the NAT block has at least one free nid,
right? so if we traverse all such NAT blocks, and still haven't collect enough
*target* number of free nids, we don't have to disable nat bit cache, we can
just break out here.

Thanks,

> 
>>
>>> +   return -EINVAL;
>>> +   }
>>> +
>>> +   nid = i * NAT_ENTRY_PER_BLOCK;
>>> +   page = get_current_nat_page(sbi, nid);
>>> +   scan_nat_page(sbi, page, nid);
>>> +   f2fs_put_page(page, 1);
>>> +
>>> +   if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
>>> +   i++;
>>> +   goto check_partial;
>>> +   }
>>> +out:
>>> +   up_read(_i->nat_tree_lock);
>>> +   return 0;
>>> +}
>>> +
>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool 
>>> mount)
>>>  {
>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info 
>>> *sbi, bool sync)
>>> if (!sync && !available_free_memory(sbi, FREE_NIDS))
>>> return;
>>>  
>>> +   /* try to find free nids with nat_bits */
>>> +   if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
>>> +   return;
>>> +
>>> +   /* find next valid candidate */
>>
>> This is just for mount case?
> 
> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
> NAT pages.
> 
> 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-24 Thread Chao Yu
On 2017/2/24 6:54, Jaegeuk Kim wrote:
> On 02/23, Chao Yu wrote:
>> On 2017/2/14 10:06, Jaegeuk Kim wrote:
>>> This patches adds bitmaps to represent empty or full NAT blocks containing
>>> free nid entries.
>>>
>>> If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
>>> use these bitmaps when building free nids. In order to avoid checkpointing
>>> burden, up-to-date bitmaps will be flushed only during umount time. So,
>>> normally we can get this gain, but when power-cut happens, we rely on 
>>> fsck.f2fs
>>> which recovers this bitmap again.
>>>
>>> After this patch, we build free nids from nid #0 at mount time to make more
>>> full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
>>> without loading any NAT pages from disk.
>>>
>>> Signed-off-by: Jaegeuk Kim 
>>> ---
>>>  fs/f2fs/checkpoint.c|  29 +++-
>>>  fs/f2fs/debug.c |   1 +
>>>  fs/f2fs/f2fs.h  |  23 +-
>>>  fs/f2fs/node.c  | 188 
>>> +++-
>>>  fs/f2fs/segment.c   |   2 +-
>>>  include/linux/f2fs_fs.h |   1 +
>>>  6 files changed, 224 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>> index 042f8d9afe44..783c5c3f16a4 100644
>>> --- a/fs/f2fs/checkpoint.c
>>> +++ b/fs/f2fs/checkpoint.c
>>> @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info 
>>> *sbi, struct cp_control *cpc)
>>>  
>>> spin_lock(>cp_lock);
>>>  
>>> +   if (ckpt->cp_pack_total_block_count >
>>> +   sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
>>> +   disable_nat_bits(sbi, false);
>>
>> I think we need to drop nat full/empty bitmap only if there is no enough 
>> space
>> in CP area while doing umount, otherwise we can keep this in memory.
> 
> Yup.
> 
>>
>>> +
>>> if (cpc->reason == CP_UMOUNT)
>>> __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
>>> else
>>> @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
>>> struct cp_control *cpc)
>>>  
>>> start_blk = __start_cp_next_addr(sbi);
>>>  
>>> +   /* write nat bits */
> 
> ...
> 
>>> +static int scan_nat_bits(struct f2fs_sb_info *sbi)
>>> +{
>>> +   struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> +   struct page *page;
>>> +   unsigned int i = 0;
>>> +   nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
>>> +   nid_t nid;
>>> +
>>> +   if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
>>> +   return -EAGAIN;
>>> +
>>> +   down_read(_i->nat_tree_lock);
>>> +check_empty:
>>> +   i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
>>> +   if (i >= nm_i->nat_blocks) {
>>> +   i = 0;
>>> +   goto check_partial;
>>> +   }
>>> +
>>> +   for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
>>> +   nid++) {
>>> +   if (unlikely(nid >= nm_i->max_nid))
>>> +   break;
>>> +   add_free_nid(sbi, nid, true);
>>> +   }
>>> +
>>> +   if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
>>> +   goto out;
>>> +   i++;
>>> +   goto check_empty;
>>> +
>>> +check_partial:
>>> +   i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
>>> +   if (i >= nm_i->nat_blocks) {
>>> +   disable_nat_bits(sbi, true);
>>
>> Can this happen in real world? Should be a bug in somewhere?
> 
> It happens, since current design handles full_nat_bits optionally in order
> to avoid scanning a whole NAT page to set it back as 1 from 0.

All 0 value in full_nat_bits means the NAT block has at least one free nid,
right? so if we traverse all such NAT blocks, and still haven't collect enough
*target* number of free nids, we don't have to disable nat bit cache, we can
just break out here.

Thanks,

> 
>>
>>> +   return -EINVAL;
>>> +   }
>>> +
>>> +   nid = i * NAT_ENTRY_PER_BLOCK;
>>> +   page = get_current_nat_page(sbi, nid);
>>> +   scan_nat_page(sbi, page, nid);
>>> +   f2fs_put_page(page, 1);
>>> +
>>> +   if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
>>> +   i++;
>>> +   goto check_partial;
>>> +   }
>>> +out:
>>> +   up_read(_i->nat_tree_lock);
>>> +   return 0;
>>> +}
>>> +
>>> +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool 
>>> mount)
>>>  {
>>> struct f2fs_nm_info *nm_i = NM_I(sbi);
>>> struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>> @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info 
>>> *sbi, bool sync)
>>> if (!sync && !available_free_memory(sbi, FREE_NIDS))
>>> return;
>>>  
>>> +   /* try to find free nids with nat_bits */
>>> +   if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
>>> +   return;
>>> +
>>> +   /* find next valid candidate */
>>
>> This is just for mount case?
> 
> Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
> NAT pages.
> 
> Thanks,
> 
>>
>>> +   

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-23 Thread Jaegeuk Kim
On 02/23, Chao Yu wrote:
> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > This patches adds bitmaps to represent empty or full NAT blocks containing
> > free nid entries.
> > 
> > If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
> > use these bitmaps when building free nids. In order to avoid checkpointing
> > burden, up-to-date bitmaps will be flushed only during umount time. So,
> > normally we can get this gain, but when power-cut happens, we rely on 
> > fsck.f2fs
> > which recovers this bitmap again.
> > 
> > After this patch, we build free nids from nid #0 at mount time to make more
> > full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
> > without loading any NAT pages from disk.
> > 
> > Signed-off-by: Jaegeuk Kim 
> > ---
> >  fs/f2fs/checkpoint.c|  29 +++-
> >  fs/f2fs/debug.c |   1 +
> >  fs/f2fs/f2fs.h  |  23 +-
> >  fs/f2fs/node.c  | 188 
> > +++-
> >  fs/f2fs/segment.c   |   2 +-
> >  include/linux/f2fs_fs.h |   1 +
> >  6 files changed, 224 insertions(+), 20 deletions(-)
> > 
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 042f8d9afe44..783c5c3f16a4 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info 
> > *sbi, struct cp_control *cpc)
> >  
> > spin_lock(>cp_lock);
> >  
> > +   if (ckpt->cp_pack_total_block_count >
> > +   sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
> > +   disable_nat_bits(sbi, false);
> 
> I think we need to drop nat full/empty bitmap only if there is no enough space
> in CP area while doing umount, otherwise we can keep this in memory.

Yup.

> 
> > +
> > if (cpc->reason == CP_UMOUNT)
> > __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
> > else
> > @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
> > struct cp_control *cpc)
> >  
> > start_blk = __start_cp_next_addr(sbi);
> >  
> > +   /* write nat bits */

...

> > +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> > +{
> > +   struct f2fs_nm_info *nm_i = NM_I(sbi);
> > +   struct page *page;
> > +   unsigned int i = 0;
> > +   nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> > +   nid_t nid;
> > +
> > +   if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > +   return -EAGAIN;
> > +
> > +   down_read(_i->nat_tree_lock);
> > +check_empty:
> > +   i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> > +   if (i >= nm_i->nat_blocks) {
> > +   i = 0;
> > +   goto check_partial;
> > +   }
> > +
> > +   for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> > +   nid++) {
> > +   if (unlikely(nid >= nm_i->max_nid))
> > +   break;
> > +   add_free_nid(sbi, nid, true);
> > +   }
> > +
> > +   if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> > +   goto out;
> > +   i++;
> > +   goto check_empty;
> > +
> > +check_partial:
> > +   i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> > +   if (i >= nm_i->nat_blocks) {
> > +   disable_nat_bits(sbi, true);
> 
> Can this happen in real world? Should be a bug in somewhere?

It happens, since current design handles full_nat_bits optionally in order
to avoid scanning a whole NAT page to set it back as 1 from 0.

> 
> > +   return -EINVAL;
> > +   }
> > +
> > +   nid = i * NAT_ENTRY_PER_BLOCK;
> > +   page = get_current_nat_page(sbi, nid);
> > +   scan_nat_page(sbi, page, nid);
> > +   f2fs_put_page(page, 1);
> > +
> > +   if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> > +   i++;
> > +   goto check_partial;
> > +   }
> > +out:
> > +   up_read(_i->nat_tree_lock);
> > +   return 0;
> > +}
> > +
> > +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool 
> > mount)
> >  {
> > struct f2fs_nm_info *nm_i = NM_I(sbi);
> > struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info 
> > *sbi, bool sync)
> > if (!sync && !available_free_memory(sbi, FREE_NIDS))
> > return;
> >  
> > +   /* try to find free nids with nat_bits */
> > +   if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> > +   return;
> > +
> > +   /* find next valid candidate */
> 
> This is just for mount case?

Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
NAT pages.

Thanks,

> 
> > +   if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> > +   int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> > +   nm_i->nat_blocks, 0);
> > +   if (idx >= nm_i->nat_blocks)
> > +   set_sbi_flag(sbi, SBI_NEED_FSCK);
> > +   else
> > + 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-23 Thread Jaegeuk Kim
On 02/23, Chao Yu wrote:
> On 2017/2/14 10:06, Jaegeuk Kim wrote:
> > This patches adds bitmaps to represent empty or full NAT blocks containing
> > free nid entries.
> > 
> > If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
> > use these bitmaps when building free nids. In order to avoid checkpointing
> > burden, up-to-date bitmaps will be flushed only during umount time. So,
> > normally we can get this gain, but when power-cut happens, we rely on 
> > fsck.f2fs
> > which recovers this bitmap again.
> > 
> > After this patch, we build free nids from nid #0 at mount time to make more
> > full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
> > without loading any NAT pages from disk.
> > 
> > Signed-off-by: Jaegeuk Kim 
> > ---
> >  fs/f2fs/checkpoint.c|  29 +++-
> >  fs/f2fs/debug.c |   1 +
> >  fs/f2fs/f2fs.h  |  23 +-
> >  fs/f2fs/node.c  | 188 
> > +++-
> >  fs/f2fs/segment.c   |   2 +-
> >  include/linux/f2fs_fs.h |   1 +
> >  6 files changed, 224 insertions(+), 20 deletions(-)
> > 
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 042f8d9afe44..783c5c3f16a4 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info 
> > *sbi, struct cp_control *cpc)
> >  
> > spin_lock(>cp_lock);
> >  
> > +   if (ckpt->cp_pack_total_block_count >
> > +   sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
> > +   disable_nat_bits(sbi, false);
> 
> I think we need to drop nat full/empty bitmap only if there is no enough space
> in CP area while doing umount, otherwise we can keep this in memory.

Yup.

> 
> > +
> > if (cpc->reason == CP_UMOUNT)
> > __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
> > else
> > @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
> > struct cp_control *cpc)
> >  
> > start_blk = __start_cp_next_addr(sbi);
> >  
> > +   /* write nat bits */

...

> > +static int scan_nat_bits(struct f2fs_sb_info *sbi)
> > +{
> > +   struct f2fs_nm_info *nm_i = NM_I(sbi);
> > +   struct page *page;
> > +   unsigned int i = 0;
> > +   nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
> > +   nid_t nid;
> > +
> > +   if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
> > +   return -EAGAIN;
> > +
> > +   down_read(_i->nat_tree_lock);
> > +check_empty:
> > +   i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
> > +   if (i >= nm_i->nat_blocks) {
> > +   i = 0;
> > +   goto check_partial;
> > +   }
> > +
> > +   for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
> > +   nid++) {
> > +   if (unlikely(nid >= nm_i->max_nid))
> > +   break;
> > +   add_free_nid(sbi, nid, true);
> > +   }
> > +
> > +   if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
> > +   goto out;
> > +   i++;
> > +   goto check_empty;
> > +
> > +check_partial:
> > +   i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
> > +   if (i >= nm_i->nat_blocks) {
> > +   disable_nat_bits(sbi, true);
> 
> Can this happen in real world? Should be a bug in somewhere?

It happens, since current design handles full_nat_bits optionally in order
to avoid scanning a whole NAT page to set it back as 1 from 0.

> 
> > +   return -EINVAL;
> > +   }
> > +
> > +   nid = i * NAT_ENTRY_PER_BLOCK;
> > +   page = get_current_nat_page(sbi, nid);
> > +   scan_nat_page(sbi, page, nid);
> > +   f2fs_put_page(page, 1);
> > +
> > +   if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
> > +   i++;
> > +   goto check_partial;
> > +   }
> > +out:
> > +   up_read(_i->nat_tree_lock);
> > +   return 0;
> > +}
> > +
> > +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool 
> > mount)
> >  {
> > struct f2fs_nm_info *nm_i = NM_I(sbi);
> > struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > @@ -1854,6 +1911,20 @@ static void __build_free_nids(struct f2fs_sb_info 
> > *sbi, bool sync)
> > if (!sync && !available_free_memory(sbi, FREE_NIDS))
> > return;
> >  
> > +   /* try to find free nids with nat_bits */
> > +   if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
> > +   return;
> > +
> > +   /* find next valid candidate */
> 
> This is just for mount case?

Yup, it reuses free nids in dirty NAT blocks, so that we can make them as full
NAT pages.

Thanks,

> 
> > +   if (is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> > +   int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
> > +   nm_i->nat_blocks, 0);
> > +   if (idx >= nm_i->nat_blocks)
> > +   set_sbi_flag(sbi, SBI_NEED_FSCK);
> > +   else
> > +   nid = idx * 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-23 Thread Chao Yu
On 2017/2/14 10:06, Jaegeuk Kim wrote:
> This patches adds bitmaps to represent empty or full NAT blocks containing
> free nid entries.
> 
> If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
> use these bitmaps when building free nids. In order to avoid checkpointing
> burden, up-to-date bitmaps will be flushed only during umount time. So,
> normally we can get this gain, but when power-cut happens, we rely on 
> fsck.f2fs
> which recovers this bitmap again.
> 
> After this patch, we build free nids from nid #0 at mount time to make more
> full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
> without loading any NAT pages from disk.
> 
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/checkpoint.c|  29 +++-
>  fs/f2fs/debug.c |   1 +
>  fs/f2fs/f2fs.h  |  23 +-
>  fs/f2fs/node.c  | 188 
> +++-
>  fs/f2fs/segment.c   |   2 +-
>  include/linux/f2fs_fs.h |   1 +
>  6 files changed, 224 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 042f8d9afe44..783c5c3f16a4 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info 
> *sbi, struct cp_control *cpc)
>  
>   spin_lock(>cp_lock);
>  
> + if (ckpt->cp_pack_total_block_count >
> + sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
> + disable_nat_bits(sbi, false);

I think we need to drop nat full/empty bitmap only if there is no enough space
in CP area while doing umount, otherwise we can keep this in memory.

> +
>   if (cpc->reason == CP_UMOUNT)
>   __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
>   else
> @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
> struct cp_control *cpc)
>  
>   start_blk = __start_cp_next_addr(sbi);
>  
> + /* write nat bits */
> + if (cpc->reason == CP_UMOUNT &&
> + is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);

__u64 cp_ver = cur_cp_version(ckpt);

> + unsigned int i;
> + block_t blk;
> +
> + cp_ver |= ((__u64)crc32 << 32);
> + *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
> +
> + blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
> + for (i = 0; i < nm_i->nat_bits_blocks; i++)
> + update_meta_page(sbi, nm_i->nat_bits +
> + (i << F2FS_BLKSIZE_BITS), blk + i);
> +
> + /* Flush all the NAT BITS pages */
> + while (get_pages(sbi, F2FS_DIRTY_META)) {
> + sync_meta_pages(sbi, META, LONG_MAX);
> + if (unlikely(f2fs_cp_error(sbi)))
> + return -EIO;
> + }
> + }
> +
>   /* need to wait for end_io results */
>   wait_on_all_pages_writeback(sbi);
>   if (unlikely(f2fs_cp_error(sbi)))
> @@ -1272,7 +1299,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct 
> cp_control *cpc)
>   ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
>  
>   /* write cached NAT/SIT entries to NAT/SIT area */
> - flush_nat_entries(sbi);
> + flush_nat_entries(sbi, cpc);
>   flush_sit_entries(sbi, cpc);
>  
>   /* unlock all the fs_lock[] in do_checkpoint() */
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index de8da9fc5c99..015ad2b73a92 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -193,6 +193,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>   /* build nm */
>   si->base_mem += sizeof(struct f2fs_nm_info);
>   si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
> + si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
>  
>  get_cache:
>   si->cache_mem = 0;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 5c87d763a347..d263dade5e4c 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -547,6 +547,7 @@ struct f2fs_nm_info {
>   struct list_head nat_entries;   /* cached nat entry list (clean) */
>   unsigned int nat_cnt;   /* the # of cached nat entries */
>   unsigned int dirty_nat_cnt; /* total num of nat entries in set */
> + unsigned int nat_blocks;/* # of nat blocks */
>  
>   /* free node ids management */
>   struct radix_tree_root free_nid_root;/* root of the free_nid cache */
> @@ -557,6 +558,11 @@ struct f2fs_nm_info {
>  
>   /* for checkpoint */
>   char *nat_bitmap;   /* NAT bitmap pointer */
> +
> + unsigned int nat_bits_blocks;   /* # of nat bits blocks */
> + unsigned char *nat_bits;/* NAT bits blocks */
> + unsigned char *full_nat_bits;   /* full NAT pages */
> + unsigned char *empty_nat_bits;  /* empty NAT pages */
>  #ifdef CONFIG_F2FS_CHECK_FS
> 

Re: [f2fs-dev] [PATCH 2/3] f2fs: add bitmaps for empty or full NAT blocks

2017-02-23 Thread Chao Yu
On 2017/2/14 10:06, Jaegeuk Kim wrote:
> This patches adds bitmaps to represent empty or full NAT blocks containing
> free nid entries.
> 
> If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
> use these bitmaps when building free nids. In order to avoid checkpointing
> burden, up-to-date bitmaps will be flushed only during umount time. So,
> normally we can get this gain, but when power-cut happens, we rely on 
> fsck.f2fs
> which recovers this bitmap again.
> 
> After this patch, we build free nids from nid #0 at mount time to make more
> full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
> without loading any NAT pages from disk.
> 
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/checkpoint.c|  29 +++-
>  fs/f2fs/debug.c |   1 +
>  fs/f2fs/f2fs.h  |  23 +-
>  fs/f2fs/node.c  | 188 
> +++-
>  fs/f2fs/segment.c   |   2 +-
>  include/linux/f2fs_fs.h |   1 +
>  6 files changed, 224 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 042f8d9afe44..783c5c3f16a4 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info 
> *sbi, struct cp_control *cpc)
>  
>   spin_lock(>cp_lock);
>  
> + if (ckpt->cp_pack_total_block_count >
> + sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
> + disable_nat_bits(sbi, false);

I think we need to drop nat full/empty bitmap only if there is no enough space
in CP area while doing umount, otherwise we can keep this in memory.

> +
>   if (cpc->reason == CP_UMOUNT)
>   __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
>   else
> @@ -1136,6 +1140,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
> struct cp_control *cpc)
>  
>   start_blk = __start_cp_next_addr(sbi);
>  
> + /* write nat bits */
> + if (cpc->reason == CP_UMOUNT &&
> + is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
> + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);

__u64 cp_ver = cur_cp_version(ckpt);

> + unsigned int i;
> + block_t blk;
> +
> + cp_ver |= ((__u64)crc32 << 32);
> + *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
> +
> + blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
> + for (i = 0; i < nm_i->nat_bits_blocks; i++)
> + update_meta_page(sbi, nm_i->nat_bits +
> + (i << F2FS_BLKSIZE_BITS), blk + i);
> +
> + /* Flush all the NAT BITS pages */
> + while (get_pages(sbi, F2FS_DIRTY_META)) {
> + sync_meta_pages(sbi, META, LONG_MAX);
> + if (unlikely(f2fs_cp_error(sbi)))
> + return -EIO;
> + }
> + }
> +
>   /* need to wait for end_io results */
>   wait_on_all_pages_writeback(sbi);
>   if (unlikely(f2fs_cp_error(sbi)))
> @@ -1272,7 +1299,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct 
> cp_control *cpc)
>   ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
>  
>   /* write cached NAT/SIT entries to NAT/SIT area */
> - flush_nat_entries(sbi);
> + flush_nat_entries(sbi, cpc);
>   flush_sit_entries(sbi, cpc);
>  
>   /* unlock all the fs_lock[] in do_checkpoint() */
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index de8da9fc5c99..015ad2b73a92 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -193,6 +193,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>   /* build nm */
>   si->base_mem += sizeof(struct f2fs_nm_info);
>   si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
> + si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
>  
>  get_cache:
>   si->cache_mem = 0;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 5c87d763a347..d263dade5e4c 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -547,6 +547,7 @@ struct f2fs_nm_info {
>   struct list_head nat_entries;   /* cached nat entry list (clean) */
>   unsigned int nat_cnt;   /* the # of cached nat entries */
>   unsigned int dirty_nat_cnt; /* total num of nat entries in set */
> + unsigned int nat_blocks;/* # of nat blocks */
>  
>   /* free node ids management */
>   struct radix_tree_root free_nid_root;/* root of the free_nid cache */
> @@ -557,6 +558,11 @@ struct f2fs_nm_info {
>  
>   /* for checkpoint */
>   char *nat_bitmap;   /* NAT bitmap pointer */
> +
> + unsigned int nat_bits_blocks;   /* # of nat bits blocks */
> + unsigned char *nat_bits;/* NAT bits blocks */
> + unsigned char *full_nat_bits;   /* full NAT pages */
> + unsigned char *empty_nat_bits;  /* empty NAT pages */
>  #ifdef CONFIG_F2FS_CHECK_FS
>   char