Re: [Ocfs2-devel] [PATCH] ocfs2: check the metadate alloc before marking extent written

Joseph Qi Mon, 04 Dec 2017 19:17:55 -0800


On 17/12/5 11:09, alex chen wrote:
> 
> 
> On 2017/12/5 10:45, Joseph Qi wrote:
>>
>>
>> On 17/12/5 10:36, alex chen wrote:
>>> Hi Joseph,
>>>
>>> Thanks for your reviews.
>>>
>>> On 2017/12/5 10:21, Joseph Qi wrote:
>>>>
>>>>
>>>> On 17/12/5 09:41, alex chen wrote:
>>>>> We need to check the free number of the records in each loop to mark
>>>>> extent written, because the last extent block may be changed through
>>>>> many times marking extent written and the 'num_free_extents' also be
>>>>> changed. In the worst case, the 'num_free_extents' may become less
>>>>> than the beginning of the loop. So we should not estimate the free
>>>>> number of the records at the beginning of the loop to mark extent
>>>>> written.
>>>>>
>>>>> Reported-by: John Lightsey <j...@nixnuts.net>
>>>>> Signed-off-by: Alex Chen <alex.c...@huawei.com>
>>>>> Reviewed-by: Jun Piao <piao...@huawei.com>
>>>>> ---
>>>>>  fs/ocfs2/aops.c | 76 
>>>>> ++++++++++++++++++++++++++++++++++++++++++++++++---------
>>>>>  1 file changed, 64 insertions(+), 12 deletions(-)
>>>>>
>>>>> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
>>>>> index d151632..702aebc 100644
>>>>> --- a/fs/ocfs2/aops.c
>>>>> +++ b/fs/ocfs2/aops.c
>>>>> @@ -2272,6 +2272,35 @@ static int ocfs2_dio_wr_get_block(struct inode 
>>>>> *inode, sector_t iblock,
>>>>>   return ret;
>>>>>  }
>>>>>
>>>>> +static int ocfs2_dio_should_restart(struct ocfs2_extent_tree *et,
>>>>> +         struct ocfs2_alloc_context *meta_ac, int max_rec_needed)
>>>>> +{
>>>>> + int status = 0, free_extents;
>>>>> +
>>>>> + free_extents = ocfs2_num_free_extents(et);
>>>>> + if (free_extents < 0) {
>>>>> +         status = free_extents;
>>>>> +         mlog_errno(status);
>>>>> +         return status;
>>>>> + }
>>>>> +
>>>>> + /*
>>>>> +  * there are two cases which could cause us to EAGAIN in the
>>>>> +  * we-need-more-metadata case:
>>>>> +  * 1) we haven't reserved *any*
>>>>> +  * 2) we are so fragmented, we've needed to add metadata too
>>>>> +  *    many times.
>>>>> +  */
>>>>> + if (free_extents < max_rec_needed) {
>>>>> +         if (!meta_ac || (ocfs2_alloc_context_bits_left(meta_ac)
>>>>> +                         < ocfs2_extend_meta_needed(et->et_root_el)))
>>>>> +                 status = 1;
>>>>> + }
>>>>> +
>>>>> + return status;
>>>>> +}
>>>>> +
>>>>> +#define OCFS2_MAX_REC_NEEDED_SPLIT 2
>>>>>  static int ocfs2_dio_end_io_write(struct inode *inode,
>>>>>                             struct ocfs2_dio_write_ctxt *dwc,
>>>>>                             loff_t offset,
>>>>> @@ -2281,14 +2310,13 @@ static int ocfs2_dio_end_io_write(struct inode 
>>>>> *inode,
>>>>>   struct ocfs2_extent_tree et;
>>>>>   struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>>>>   struct ocfs2_inode_info *oi = OCFS2_I(inode);
>>>>> - struct ocfs2_unwritten_extent *ue = NULL;
>>>>> + struct ocfs2_unwritten_extent *ue = NULL, *tmp_ue;
>>>>>   struct buffer_head *di_bh = NULL;
>>>>>   struct ocfs2_dinode *di;
>>>>> - struct ocfs2_alloc_context *data_ac = NULL;
>>>>>   struct ocfs2_alloc_context *meta_ac = NULL;
>>>>>   handle_t *handle = NULL;
>>>>>   loff_t end = offset + bytes;
>>>>> - int ret = 0, credits = 0, locked = 0;
>>>>> + int ret = 0, credits = 0, locked = 0, restart = 0;
>>>>>
>>>>>   ocfs2_init_dealloc_ctxt(&dealloc);
>>>>>
>>>>> @@ -2330,8 +2358,9 @@ static int ocfs2_dio_end_io_write(struct inode 
>>>>> *inode,
>>>>>
>>>> Should we restart here?
>>>
>>> OK. we should restart before initialized the inode extent tree.
>>>
>>>>
>>>>>   ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
>>>>>
>>>>> - ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
>>>>> -                             &data_ac, &meta_ac);
>>>>> +restart_all:
>>>>> + ret = ocfs2_lock_allocators(inode, &et, 0, OCFS2_MAX_REC_NEEDED_SPLIT,
>>>>> +                             NULL, &meta_ac);
>>>>>   if (ret) {
>>>>>           mlog_errno(ret);
>>>>>           goto unlock;
>>>>> @@ -2343,7 +2372,7 @@ static int ocfs2_dio_end_io_write(struct inode 
>>>>> *inode,
>>>>>   if (IS_ERR(handle)) {
>>>>>           ret = PTR_ERR(handle);
>>>>>           mlog_errno(ret);
>>>>> -         goto unlock;
>>>>> +         goto free_ac;
>>>> I don't think it is a necessary change here.
>>>
>>> we need to free the 'meta_ac' which allocated in ocfs2_lock_allocators() 
>>> when we
>>> start transaction failed.
>>>
>> As I described in the last mail, just move the restart check after the
>> original free meta_ac check.
>>
>>> Thanks,
>>> Alex
>>>>
>>>>>   }
>>>>>   ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
>>>>>                                 OCFS2_JOURNAL_ACCESS_WRITE);
>>>>> @@ -2352,7 +2381,17 @@ static int ocfs2_dio_end_io_write(struct inode 
>>>>> *inode,
>>>>>           goto commit;
>>>>>   }
>>>>>
>>>>> - list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
>>>>> + list_for_each_entry_safe(ue, tmp_ue, &dwc->dw_zero_list, ue_node) {
>>>>> +         ret = ocfs2_dio_should_restart(&et, meta_ac,
>>>>> +                         OCFS2_MAX_REC_NEEDED_SPLIT * 2);
>>>>> +         if (ret < 0) {
>>>>> +                 mlog_errno(ret);
>>>>> +                 break;
>>>>> +         } else if (ret == 1) {
>>>>> +                 restart = 1;
>>>>> +                 break;
>>>>> +         }
>>>>> +
>>>>>           ret = ocfs2_mark_extent_written(inode, &et, handle,
>>>>>                                           ue->ue_cpos, 1,
>>>>>                                           ue->ue_phys,
>>>>> @@ -2361,24 +2400,37 @@ static int ocfs2_dio_end_io_write(struct inode 
>>>>> *inode,
>>>>>                   mlog_errno(ret);
>>>>>                   break;
>>>>>           }
>>>>> +
>>>>> +         dwc->dw_zero_count--;
>>>>> +         list_del_init(&ue->ue_node);
>>>>> +         spin_lock(&oi->ip_lock);
>>>>> +         list_del_init(&ue->ue_ip_node);
>>>>> +         spin_unlock(&oi->ip_lock);
>>>>> +         kfree(ue);
>>>>>   }
>>>>>
>>>>> - if (end > i_size_read(inode)) {
>>>>> + if (!restart && end > i_size_read(inode)) {
>>>>>           ret = ocfs2_set_inode_size(handle, inode, di_bh, end);
>>>>>           if (ret < 0)
>>>>>                   mlog_errno(ret);
>>>>>   }
>>>>> +
>>>>>  commit:
>>>>>   ocfs2_commit_trans(osb, handle);
>>>>> +free_ac:
>>>>> + if (meta_ac) {
>>>>> +         ocfs2_free_alloc_context(meta_ac);
>>>>> +         meta_ac = NULL;
>>>>> + }
>>>>> + if (restart) {
>>>>> +         restart = 0;
>>>>> +         goto restart_all;
>>>>> + }
>>>>>  unlock:
>>>>>   up_write(&oi->ip_alloc_sem);
>>>>>   ocfs2_inode_unlock(inode, 1);
>>>>>   brelse(di_bh);
>>>>>  out:
>>>>> - if (data_ac)
>>>>> -         ocfs2_free_alloc_context(data_ac);
>>>> Agree to cleanup the unused data_ac.
>>>>
>>>>> - if (meta_ac)
>>>>> -         ocfs2_free_alloc_context(meta_ac);
>> Just move the restart logic down is enough.
> 
> If we check the restart here, we will unlock the 'ip_alloc_sem' and release 
> the
> inode buffer, which we don't need to do.
> 
Oh, my mistake.
You are right, we should restart under inode lock as well as
ip_alloc_sem.


> Thanks,
> Alex
>>>>
>>>> Thanks,
>>>> Joseph
>>>>
>>>>>   ocfs2_run_deallocs(osb, &dealloc);
>>>>>   if (locked)
>>>>>           inode_unlock(inode);
>>>>>
>>>>
>>>> .
>>>>
>>>
>>
>> .
>>
> 

_______________________________________________
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
https://oss.oracle.com/mailman/listinfo/ocfs2-devel

Re: [Ocfs2-devel] [PATCH] ocfs2: check the metadate alloc before marking extent written

Reply via email to