Re: [Ocfs2-devel] [PATCH] ocfs2: check the metadate alloc before marking extent written

alex chen Mon, 04 Dec 2017 19:11:51 -0800


On 2017/12/5 10:45, Joseph Qi wrote:
> 
> 
> On 17/12/5 10:36, alex chen wrote:
>> Hi Joseph,
>>
>> Thanks for your reviews.
>>
>> On 2017/12/5 10:21, Joseph Qi wrote:
>>>
>>>
>>> On 17/12/5 09:41, alex chen wrote:
>>>> We need to check the free number of the records in each loop to mark
>>>> extent written, because the last extent block may be changed through
>>>> many times marking extent written and the 'num_free_extents' also be
>>>> changed. In the worst case, the 'num_free_extents' may become less
>>>> than the beginning of the loop. So we should not estimate the free
>>>> number of the records at the beginning of the loop to mark extent
>>>> written.
>>>>
>>>> Reported-by: John Lightsey <j...@nixnuts.net>
>>>> Signed-off-by: Alex Chen <alex.c...@huawei.com>
>>>> Reviewed-by: Jun Piao <piao...@huawei.com>
>>>> ---
>>>>  fs/ocfs2/aops.c | 76 
>>>> ++++++++++++++++++++++++++++++++++++++++++++++++---------
>>>>  1 file changed, 64 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
>>>> index d151632..702aebc 100644
>>>> --- a/fs/ocfs2/aops.c
>>>> +++ b/fs/ocfs2/aops.c
>>>> @@ -2272,6 +2272,35 @@ static int ocfs2_dio_wr_get_block(struct inode 
>>>> *inode, sector_t iblock,
>>>>    return ret;
>>>>  }
>>>>
>>>> +static int ocfs2_dio_should_restart(struct ocfs2_extent_tree *et,
>>>> +          struct ocfs2_alloc_context *meta_ac, int max_rec_needed)
>>>> +{
>>>> +  int status = 0, free_extents;
>>>> +
>>>> +  free_extents = ocfs2_num_free_extents(et);
>>>> +  if (free_extents < 0) {
>>>> +          status = free_extents;
>>>> +          mlog_errno(status);
>>>> +          return status;
>>>> +  }
>>>> +
>>>> +  /*
>>>> +   * there are two cases which could cause us to EAGAIN in the
>>>> +   * we-need-more-metadata case:
>>>> +   * 1) we haven't reserved *any*
>>>> +   * 2) we are so fragmented, we've needed to add metadata too
>>>> +   *    many times.
>>>> +   */
>>>> +  if (free_extents < max_rec_needed) {
>>>> +          if (!meta_ac || (ocfs2_alloc_context_bits_left(meta_ac)
>>>> +                          < ocfs2_extend_meta_needed(et->et_root_el)))
>>>> +                  status = 1;
>>>> +  }
>>>> +
>>>> +  return status;
>>>> +}
>>>> +
>>>> +#define OCFS2_MAX_REC_NEEDED_SPLIT 2
>>>>  static int ocfs2_dio_end_io_write(struct inode *inode,
>>>>                              struct ocfs2_dio_write_ctxt *dwc,
>>>>                              loff_t offset,
>>>> @@ -2281,14 +2310,13 @@ static int ocfs2_dio_end_io_write(struct inode 
>>>> *inode,
>>>>    struct ocfs2_extent_tree et;
>>>>    struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>>>    struct ocfs2_inode_info *oi = OCFS2_I(inode);
>>>> -  struct ocfs2_unwritten_extent *ue = NULL;
>>>> +  struct ocfs2_unwritten_extent *ue = NULL, *tmp_ue;
>>>>    struct buffer_head *di_bh = NULL;
>>>>    struct ocfs2_dinode *di;
>>>> -  struct ocfs2_alloc_context *data_ac = NULL;
>>>>    struct ocfs2_alloc_context *meta_ac = NULL;
>>>>    handle_t *handle = NULL;
>>>>    loff_t end = offset + bytes;
>>>> -  int ret = 0, credits = 0, locked = 0;
>>>> +  int ret = 0, credits = 0, locked = 0, restart = 0;
>>>>
>>>>    ocfs2_init_dealloc_ctxt(&dealloc);
>>>>
>>>> @@ -2330,8 +2358,9 @@ static int ocfs2_dio_end_io_write(struct inode 
>>>> *inode,
>>>>
>>> Should we restart here?
>>
>> OK. we should restart before initialized the inode extent tree.
>>
>>>
>>>>    ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
>>>>
>>>> -  ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
>>>> -                              &data_ac, &meta_ac);
>>>> +restart_all:
>>>> +  ret = ocfs2_lock_allocators(inode, &et, 0, OCFS2_MAX_REC_NEEDED_SPLIT,
>>>> +                              NULL, &meta_ac);
>>>>    if (ret) {
>>>>            mlog_errno(ret);
>>>>            goto unlock;
>>>> @@ -2343,7 +2372,7 @@ static int ocfs2_dio_end_io_write(struct inode 
>>>> *inode,
>>>>    if (IS_ERR(handle)) {
>>>>            ret = PTR_ERR(handle);
>>>>            mlog_errno(ret);
>>>> -          goto unlock;
>>>> +          goto free_ac;
>>> I don't think it is a necessary change here.
>>
>> we need to free the 'meta_ac' which allocated in ocfs2_lock_allocators() 
>> when we
>> start transaction failed.
>>
> As I described in the last mail, just move the restart check after the
> original free meta_ac check.
> 
>> Thanks,
>> Alex
>>>
>>>>    }
>>>>    ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
>>>>                                  OCFS2_JOURNAL_ACCESS_WRITE);
>>>> @@ -2352,7 +2381,17 @@ static int ocfs2_dio_end_io_write(struct inode 
>>>> *inode,
>>>>            goto commit;
>>>>    }
>>>>
>>>> -  list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
>>>> +  list_for_each_entry_safe(ue, tmp_ue, &dwc->dw_zero_list, ue_node) {
>>>> +          ret = ocfs2_dio_should_restart(&et, meta_ac,
>>>> +                          OCFS2_MAX_REC_NEEDED_SPLIT * 2);
>>>> +          if (ret < 0) {
>>>> +                  mlog_errno(ret);
>>>> +                  break;
>>>> +          } else if (ret == 1) {
>>>> +                  restart = 1;
>>>> +                  break;
>>>> +          }
>>>> +
>>>>            ret = ocfs2_mark_extent_written(inode, &et, handle,
>>>>                                            ue->ue_cpos, 1,
>>>>                                            ue->ue_phys,
>>>> @@ -2361,24 +2400,37 @@ static int ocfs2_dio_end_io_write(struct inode 
>>>> *inode,
>>>>                    mlog_errno(ret);
>>>>                    break;
>>>>            }
>>>> +
>>>> +          dwc->dw_zero_count--;
>>>> +          list_del_init(&ue->ue_node);
>>>> +          spin_lock(&oi->ip_lock);
>>>> +          list_del_init(&ue->ue_ip_node);
>>>> +          spin_unlock(&oi->ip_lock);
>>>> +          kfree(ue);
>>>>    }
>>>>
>>>> -  if (end > i_size_read(inode)) {
>>>> +  if (!restart && end > i_size_read(inode)) {
>>>>            ret = ocfs2_set_inode_size(handle, inode, di_bh, end);
>>>>            if (ret < 0)
>>>>                    mlog_errno(ret);
>>>>    }
>>>> +
>>>>  commit:
>>>>    ocfs2_commit_trans(osb, handle);
>>>> +free_ac:
>>>> +  if (meta_ac) {
>>>> +          ocfs2_free_alloc_context(meta_ac);
>>>> +          meta_ac = NULL;
>>>> +  }
>>>> +  if (restart) {
>>>> +          restart = 0;
>>>> +          goto restart_all;
>>>> +  }
>>>>  unlock:
>>>>    up_write(&oi->ip_alloc_sem);
>>>>    ocfs2_inode_unlock(inode, 1);
>>>>    brelse(di_bh);
>>>>  out:
>>>> -  if (data_ac)
>>>> -          ocfs2_free_alloc_context(data_ac);
>>> Agree to cleanup the unused data_ac.
>>>
>>>> -  if (meta_ac)
>>>> -          ocfs2_free_alloc_context(meta_ac);
> Just move the restart logic down is enough.


If we check the restart here, we will unlock the 'ip_alloc_sem' and release the
inode buffer, which we don't need to do.

Thanks,
Alex
>>>
>>> Thanks,
>>> Joseph
>>>
>>>>    ocfs2_run_deallocs(osb, &dealloc);
>>>>    if (locked)
>>>>            inode_unlock(inode);
>>>>
>>>
>>> .
>>>
>>
> 
> .
> 


_______________________________________________
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
https://oss.oracle.com/mailman/listinfo/ocfs2-devel

Re: [Ocfs2-devel] [PATCH] ocfs2: check the metadate alloc before marking extent written

Reply via email to