Re: [Ocfs2-devel] [PATCH] ocfs2/cluster: neaten a member of o2net_msg_handler

2017-12-04 Thread alex chen
Hi Changwei,

Thank you for your patch.

On 2017/12/5 13:47, Changwei Ge wrote:
> It's odd that o2net_msg_handler::nh_func_data is declared as type
> o2net_msg_handler_func*.
> So neaten it.
> 
> Signed-off-by: Changwei Ge 
Reviewed-by: Alex Chen 
> ---
>   fs/ocfs2/cluster/tcp_internal.h | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/ocfs2/cluster/tcp_internal.h 
> b/fs/ocfs2/cluster/tcp_internal.h
> index b95e7df5b76a..0276f7f8d5e6 100644
> --- a/fs/ocfs2/cluster/tcp_internal.h
> +++ b/fs/ocfs2/cluster/tcp_internal.h
> @@ -196,7 +196,7 @@ struct o2net_msg_handler {
>   u32 nh_msg_type;
>   u32 nh_key;
>   o2net_msg_handler_func  *nh_func;
> - o2net_msg_handler_func  *nh_func_data;
> + void*nh_func_data;
>   o2net_post_msg_handler_func
>   *nh_post_func;
>   struct kref nh_kref;
> 


___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
https://oss.oracle.com/mailman/listinfo/ocfs2-devel


[Ocfs2-devel] [PATCH] ocfs2/cluster: neaten a member of o2net_msg_handler

2017-12-04 Thread Changwei Ge
It's odd that o2net_msg_handler::nh_func_data is declared as type
o2net_msg_handler_func*.
So neaten it.

Signed-off-by: Changwei Ge 
---
  fs/ocfs2/cluster/tcp_internal.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ocfs2/cluster/tcp_internal.h 
b/fs/ocfs2/cluster/tcp_internal.h
index b95e7df5b76a..0276f7f8d5e6 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -196,7 +196,7 @@ struct o2net_msg_handler {
u32 nh_msg_type;
u32 nh_key;
o2net_msg_handler_func  *nh_func;
-   o2net_msg_handler_func  *nh_func_data;
+   void*nh_func_data;
o2net_post_msg_handler_func
*nh_post_func;
struct kref nh_kref;
-- 
2.11.0

___
Ocfs2-devel mailing list
Ocfs2-devel@oss.oracle.com
https://oss.oracle.com/mailman/listinfo/ocfs2-devel


Re: [Ocfs2-devel] [PATCH v2] ocfs2: check the metadate alloc before marking extent written

2017-12-04 Thread Joseph Qi


On 17/12/5 11:29, alex chen wrote:
> We need to check the free number of the records in each loop to mark
> extent written, because the last extent block may be changed through
> many times marking extent written and the 'num_free_extents' also be
> changed. In the worst case, the 'num_free_extents' may become less
> than the beginning of the loop. So we should not estimate the free
> number of the records at the beginning of the loop to mark extent
> written.
> 
> Reported-by: John Lightsey 
> Signed-off-by: Alex Chen 
> Reviewed-by: Jun Piao 
Reviewed-by: Joseph Qi 

> ---
>  fs/ocfs2/aops.c | 77 
> +++--
>  1 file changed, 64 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index d151632..7e1659d 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -2272,6 +2272,35 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, 
> sector_t iblock,
>   return ret;
>  }
> 
> +static int ocfs2_dio_should_restart(struct ocfs2_extent_tree *et,
> + struct ocfs2_alloc_context *meta_ac, int max_rec_needed)
> +{
> + int status = 0, free_extents;
> +
> + free_extents = ocfs2_num_free_extents(et);
> + if (free_extents < 0) {
> + status = free_extents;
> + mlog_errno(status);
> + return status;
> + }
> +
> + /*
> +  * there are two cases which could cause us to EAGAIN in the
> +  * we-need-more-metadata case:
> +  * 1) we haven't reserved *any*
> +  * 2) we are so fragmented, we've needed to add metadata too
> +  *many times.
> +  */
> + if (free_extents < max_rec_needed) {
> + if (!meta_ac || (ocfs2_alloc_context_bits_left(meta_ac)
> + < ocfs2_extend_meta_needed(et->et_root_el)))
> + status = 1;
> + }
> +
> + return status;
> +}
> +
> +#define OCFS2_MAX_REC_NEEDED_SPLIT 2
>  static int ocfs2_dio_end_io_write(struct inode *inode,
> struct ocfs2_dio_write_ctxt *dwc,
> loff_t offset,
> @@ -2281,14 +2310,13 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>   struct ocfs2_extent_tree et;
>   struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>   struct ocfs2_inode_info *oi = OCFS2_I(inode);
> - struct ocfs2_unwritten_extent *ue = NULL;
> + struct ocfs2_unwritten_extent *ue = NULL, *tmp_ue;
>   struct buffer_head *di_bh = NULL;
>   struct ocfs2_dinode *di;
> - struct ocfs2_alloc_context *data_ac = NULL;
>   struct ocfs2_alloc_context *meta_ac = NULL;
>   handle_t *handle = NULL;
>   loff_t end = offset + bytes;
> - int ret = 0, credits = 0, locked = 0;
> + int ret = 0, credits = 0, locked = 0, restart = 0;
> 
>   ocfs2_init_dealloc_ctxt(&dealloc);
> 
> @@ -2328,10 +2356,10 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
> 
>   di = (struct ocfs2_dinode *)di_bh->b_data;
> 
> +restart_all:
>   ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
> -
> - ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
> - &data_ac, &meta_ac);
> + ret = ocfs2_lock_allocators(inode, &et, 0, OCFS2_MAX_REC_NEEDED_SPLIT,
> + NULL, &meta_ac);
>   if (ret) {
>   mlog_errno(ret);
>   goto unlock;
> @@ -2343,7 +2371,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>   if (IS_ERR(handle)) {
>   ret = PTR_ERR(handle);
>   mlog_errno(ret);
> - goto unlock;
> + goto free_ac;
>   }
>   ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
> OCFS2_JOURNAL_ACCESS_WRITE);
> @@ -2352,7 +2380,17 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>   goto commit;
>   }
> 
> - list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
> + list_for_each_entry_safe(ue, tmp_ue, &dwc->dw_zero_list, ue_node) {
> + ret = ocfs2_dio_should_restart(&et, meta_ac,
> + OCFS2_MAX_REC_NEEDED_SPLIT * 2);
> + if (ret < 0) {
> + mlog_errno(ret);
> + break;
> + } else if (ret == 1) {
> + restart = 1;
> + break;
> + }
> +
>   ret = ocfs2_mark_extent_written(inode, &et, handle,
>   ue->ue_cpos, 1,
>   ue->ue_phys,
> @@ -2361,24 +2399,37 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>   mlog_errno(ret);
>   break;
>   }
> +
> + dwc->dw_zero_count--;
> + list_del_init(&ue->ue_node);
> + spin_lock(&oi->ip_lock);
> + list_del_init(&ue->ue_ip_node);
> +   

[Ocfs2-devel] [PATCH v2] ocfs2: check the metadate alloc before marking extent written

2017-12-04 Thread alex chen
We need to check the free number of the records in each loop to mark
extent written, because the last extent block may be changed through
many times marking extent written and the 'num_free_extents' also be
changed. In the worst case, the 'num_free_extents' may become less
than the beginning of the loop. So we should not estimate the free
number of the records at the beginning of the loop to mark extent
written.

Reported-by: John Lightsey 
Signed-off-by: Alex Chen 
Reviewed-by: Jun Piao 
---
 fs/ocfs2/aops.c | 77 +++--
 1 file changed, 64 insertions(+), 13 deletions(-)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d151632..7e1659d 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2272,6 +2272,35 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, 
sector_t iblock,
return ret;
 }

+static int ocfs2_dio_should_restart(struct ocfs2_extent_tree *et,
+   struct ocfs2_alloc_context *meta_ac, int max_rec_needed)
+{
+   int status = 0, free_extents;
+
+   free_extents = ocfs2_num_free_extents(et);
+   if (free_extents < 0) {
+   status = free_extents;
+   mlog_errno(status);
+   return status;
+   }
+
+   /*
+* there are two cases which could cause us to EAGAIN in the
+* we-need-more-metadata case:
+* 1) we haven't reserved *any*
+* 2) we are so fragmented, we've needed to add metadata too
+*many times.
+*/
+   if (free_extents < max_rec_needed) {
+   if (!meta_ac || (ocfs2_alloc_context_bits_left(meta_ac)
+   < ocfs2_extend_meta_needed(et->et_root_el)))
+   status = 1;
+   }
+
+   return status;
+}
+
+#define OCFS2_MAX_REC_NEEDED_SPLIT 2
 static int ocfs2_dio_end_io_write(struct inode *inode,
  struct ocfs2_dio_write_ctxt *dwc,
  loff_t offset,
@@ -2281,14 +2310,13 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
struct ocfs2_extent_tree et;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_inode_info *oi = OCFS2_I(inode);
-   struct ocfs2_unwritten_extent *ue = NULL;
+   struct ocfs2_unwritten_extent *ue = NULL, *tmp_ue;
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
-   struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *meta_ac = NULL;
handle_t *handle = NULL;
loff_t end = offset + bytes;
-   int ret = 0, credits = 0, locked = 0;
+   int ret = 0, credits = 0, locked = 0, restart = 0;

ocfs2_init_dealloc_ctxt(&dealloc);

@@ -2328,10 +2356,10 @@ static int ocfs2_dio_end_io_write(struct inode *inode,

di = (struct ocfs2_dinode *)di_bh->b_data;

+restart_all:
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
-
-   ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
-   &data_ac, &meta_ac);
+   ret = ocfs2_lock_allocators(inode, &et, 0, OCFS2_MAX_REC_NEEDED_SPLIT,
+   NULL, &meta_ac);
if (ret) {
mlog_errno(ret);
goto unlock;
@@ -2343,7 +2371,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
-   goto unlock;
+   goto free_ac;
}
ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  OCFS2_JOURNAL_ACCESS_WRITE);
@@ -2352,7 +2380,17 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
goto commit;
}

-   list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
+   list_for_each_entry_safe(ue, tmp_ue, &dwc->dw_zero_list, ue_node) {
+   ret = ocfs2_dio_should_restart(&et, meta_ac,
+   OCFS2_MAX_REC_NEEDED_SPLIT * 2);
+   if (ret < 0) {
+   mlog_errno(ret);
+   break;
+   } else if (ret == 1) {
+   restart = 1;
+   break;
+   }
+
ret = ocfs2_mark_extent_written(inode, &et, handle,
ue->ue_cpos, 1,
ue->ue_phys,
@@ -2361,24 +2399,37 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
mlog_errno(ret);
break;
}
+
+   dwc->dw_zero_count--;
+   list_del_init(&ue->ue_node);
+   spin_lock(&oi->ip_lock);
+   list_del_init(&ue->ue_ip_node);
+   spin_unlock(&oi->ip_lock);
+   kfree(ue);
}

-   if (end > i_size_read(inode)) {
+   if (!restart && end > i_size_read(

Re: [Ocfs2-devel] [PATCH] ocfs2: check the metadate alloc before marking extent written

2017-12-04 Thread Joseph Qi


On 17/12/5 11:09, alex chen wrote:
> 
> 
> On 2017/12/5 10:45, Joseph Qi wrote:
>>
>>
>> On 17/12/5 10:36, alex chen wrote:
>>> Hi Joseph,
>>>
>>> Thanks for your reviews.
>>>
>>> On 2017/12/5 10:21, Joseph Qi wrote:


 On 17/12/5 09:41, alex chen wrote:
> We need to check the free number of the records in each loop to mark
> extent written, because the last extent block may be changed through
> many times marking extent written and the 'num_free_extents' also be
> changed. In the worst case, the 'num_free_extents' may become less
> than the beginning of the loop. So we should not estimate the free
> number of the records at the beginning of the loop to mark extent
> written.
>
> Reported-by: John Lightsey 
> Signed-off-by: Alex Chen 
> Reviewed-by: Jun Piao 
> ---
>  fs/ocfs2/aops.c | 76 
> -
>  1 file changed, 64 insertions(+), 12 deletions(-)
>
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index d151632..702aebc 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -2272,6 +2272,35 @@ static int ocfs2_dio_wr_get_block(struct inode 
> *inode, sector_t iblock,
>   return ret;
>  }
>
> +static int ocfs2_dio_should_restart(struct ocfs2_extent_tree *et,
> + struct ocfs2_alloc_context *meta_ac, int max_rec_needed)
> +{
> + int status = 0, free_extents;
> +
> + free_extents = ocfs2_num_free_extents(et);
> + if (free_extents < 0) {
> + status = free_extents;
> + mlog_errno(status);
> + return status;
> + }
> +
> + /*
> +  * there are two cases which could cause us to EAGAIN in the
> +  * we-need-more-metadata case:
> +  * 1) we haven't reserved *any*
> +  * 2) we are so fragmented, we've needed to add metadata too
> +  *many times.
> +  */
> + if (free_extents < max_rec_needed) {
> + if (!meta_ac || (ocfs2_alloc_context_bits_left(meta_ac)
> + < ocfs2_extend_meta_needed(et->et_root_el)))
> + status = 1;
> + }
> +
> + return status;
> +}
> +
> +#define OCFS2_MAX_REC_NEEDED_SPLIT 2
>  static int ocfs2_dio_end_io_write(struct inode *inode,
> struct ocfs2_dio_write_ctxt *dwc,
> loff_t offset,
> @@ -2281,14 +2310,13 @@ static int ocfs2_dio_end_io_write(struct inode 
> *inode,
>   struct ocfs2_extent_tree et;
>   struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>   struct ocfs2_inode_info *oi = OCFS2_I(inode);
> - struct ocfs2_unwritten_extent *ue = NULL;
> + struct ocfs2_unwritten_extent *ue = NULL, *tmp_ue;
>   struct buffer_head *di_bh = NULL;
>   struct ocfs2_dinode *di;
> - struct ocfs2_alloc_context *data_ac = NULL;
>   struct ocfs2_alloc_context *meta_ac = NULL;
>   handle_t *handle = NULL;
>   loff_t end = offset + bytes;
> - int ret = 0, credits = 0, locked = 0;
> + int ret = 0, credits = 0, locked = 0, restart = 0;
>
>   ocfs2_init_dealloc_ctxt(&dealloc);
>
> @@ -2330,8 +2358,9 @@ static int ocfs2_dio_end_io_write(struct inode 
> *inode,
>
 Should we restart here?
>>>
>>> OK. we should restart before initialized the inode extent tree.
>>>

>   ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
>
> - ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
> - &data_ac, &meta_ac);
> +restart_all:
> + ret = ocfs2_lock_allocators(inode, &et, 0, OCFS2_MAX_REC_NEEDED_SPLIT,
> + NULL, &meta_ac);
>   if (ret) {
>   mlog_errno(ret);
>   goto unlock;
> @@ -2343,7 +2372,7 @@ static int ocfs2_dio_end_io_write(struct inode 
> *inode,
>   if (IS_ERR(handle)) {
>   ret = PTR_ERR(handle);
>   mlog_errno(ret);
> - goto unlock;
> + goto free_ac;
 I don't think it is a necessary change here.
>>>
>>> we need to free the 'meta_ac' which allocated in ocfs2_lock_allocators() 
>>> when we
>>> start transaction failed.
>>>
>> As I described in the last mail, just move the restart check after the
>> original free meta_ac check.
>>
>>> Thanks,
>>> Alex

>   }
>   ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
> OCFS2_JOURNAL_ACCESS_WRITE);
> @@ -2352,7 +2381,17 @@ static int ocfs2_dio_end_io_write(struct inode 
> *inode,
>   goto commit;
>   }
>
> - list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
> + list_for_each_entry_safe(ue, tmp_ue, &dwc->dw_zero_list, ue_node) {
> + ret = ocfs2_dio_should_restart(&et, meta_ac,
> + OCFS2_MAX_REC_NEEDED_SPLIT * 2);
> +  

Re: [Ocfs2-devel] [PATCH] ocfs2: check the metadate alloc before marking extent written

2017-12-04 Thread alex chen


On 2017/12/5 10:45, Joseph Qi wrote:
> 
> 
> On 17/12/5 10:36, alex chen wrote:
>> Hi Joseph,
>>
>> Thanks for your reviews.
>>
>> On 2017/12/5 10:21, Joseph Qi wrote:
>>>
>>>
>>> On 17/12/5 09:41, alex chen wrote:
 We need to check the free number of the records in each loop to mark
 extent written, because the last extent block may be changed through
 many times marking extent written and the 'num_free_extents' also be
 changed. In the worst case, the 'num_free_extents' may become less
 than the beginning of the loop. So we should not estimate the free
 number of the records at the beginning of the loop to mark extent
 written.

 Reported-by: John Lightsey 
 Signed-off-by: Alex Chen 
 Reviewed-by: Jun Piao 
 ---
  fs/ocfs2/aops.c | 76 
 -
  1 file changed, 64 insertions(+), 12 deletions(-)

 diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
 index d151632..702aebc 100644
 --- a/fs/ocfs2/aops.c
 +++ b/fs/ocfs2/aops.c
 @@ -2272,6 +2272,35 @@ static int ocfs2_dio_wr_get_block(struct inode 
 *inode, sector_t iblock,
return ret;
  }

 +static int ocfs2_dio_should_restart(struct ocfs2_extent_tree *et,
 +  struct ocfs2_alloc_context *meta_ac, int max_rec_needed)
 +{
 +  int status = 0, free_extents;
 +
 +  free_extents = ocfs2_num_free_extents(et);
 +  if (free_extents < 0) {
 +  status = free_extents;
 +  mlog_errno(status);
 +  return status;
 +  }
 +
 +  /*
 +   * there are two cases which could cause us to EAGAIN in the
 +   * we-need-more-metadata case:
 +   * 1) we haven't reserved *any*
 +   * 2) we are so fragmented, we've needed to add metadata too
 +   *many times.
 +   */
 +  if (free_extents < max_rec_needed) {
 +  if (!meta_ac || (ocfs2_alloc_context_bits_left(meta_ac)
 +  < ocfs2_extend_meta_needed(et->et_root_el)))
 +  status = 1;
 +  }
 +
 +  return status;
 +}
 +
 +#define OCFS2_MAX_REC_NEEDED_SPLIT 2
  static int ocfs2_dio_end_io_write(struct inode *inode,
  struct ocfs2_dio_write_ctxt *dwc,
  loff_t offset,
 @@ -2281,14 +2310,13 @@ static int ocfs2_dio_end_io_write(struct inode 
 *inode,
struct ocfs2_extent_tree et;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_inode_info *oi = OCFS2_I(inode);
 -  struct ocfs2_unwritten_extent *ue = NULL;
 +  struct ocfs2_unwritten_extent *ue = NULL, *tmp_ue;
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
 -  struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *meta_ac = NULL;
handle_t *handle = NULL;
loff_t end = offset + bytes;
 -  int ret = 0, credits = 0, locked = 0;
 +  int ret = 0, credits = 0, locked = 0, restart = 0;

ocfs2_init_dealloc_ctxt(&dealloc);

 @@ -2330,8 +2358,9 @@ static int ocfs2_dio_end_io_write(struct inode 
 *inode,

>>> Should we restart here?
>>
>> OK. we should restart before initialized the inode extent tree.
>>
>>>
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);

 -  ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
 -  &data_ac, &meta_ac);
 +restart_all:
 +  ret = ocfs2_lock_allocators(inode, &et, 0, OCFS2_MAX_REC_NEEDED_SPLIT,
 +  NULL, &meta_ac);
if (ret) {
mlog_errno(ret);
goto unlock;
 @@ -2343,7 +2372,7 @@ static int ocfs2_dio_end_io_write(struct inode 
 *inode,
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
 -  goto unlock;
 +  goto free_ac;
>>> I don't think it is a necessary change here.
>>
>> we need to free the 'meta_ac' which allocated in ocfs2_lock_allocators() 
>> when we
>> start transaction failed.
>>
> As I described in the last mail, just move the restart check after the
> original free meta_ac check.
> 
>> Thanks,
>> Alex
>>>
}
ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  OCFS2_JOURNAL_ACCESS_WRITE);
 @@ -2352,7 +2381,17 @@ static int ocfs2_dio_end_io_write(struct inode 
 *inode,
goto commit;
}

 -  list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
 +  list_for_each_entry_safe(ue, tmp_ue, &dwc->dw_zero_list, ue_node) {
 +  ret = ocfs2_dio_should_restart(&et, meta_ac,
 +  OCFS2_MAX_REC_NEEDED_SPLIT * 2);
 +  if (ret < 0) {
 +  mlog_errno(ret);
 +  break;
 +  } else if

Re: [Ocfs2-devel] [PATCH] ocfs2: check the metadate alloc before marking extent written

2017-12-04 Thread Joseph Qi


On 17/12/5 10:36, alex chen wrote:
> Hi Joseph,
> 
> Thanks for your reviews.
> 
> On 2017/12/5 10:21, Joseph Qi wrote:
>>
>>
>> On 17/12/5 09:41, alex chen wrote:
>>> We need to check the free number of the records in each loop to mark
>>> extent written, because the last extent block may be changed through
>>> many times marking extent written and the 'num_free_extents' also be
>>> changed. In the worst case, the 'num_free_extents' may become less
>>> than the beginning of the loop. So we should not estimate the free
>>> number of the records at the beginning of the loop to mark extent
>>> written.
>>>
>>> Reported-by: John Lightsey 
>>> Signed-off-by: Alex Chen 
>>> Reviewed-by: Jun Piao 
>>> ---
>>>  fs/ocfs2/aops.c | 76 
>>> -
>>>  1 file changed, 64 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
>>> index d151632..702aebc 100644
>>> --- a/fs/ocfs2/aops.c
>>> +++ b/fs/ocfs2/aops.c
>>> @@ -2272,6 +2272,35 @@ static int ocfs2_dio_wr_get_block(struct inode 
>>> *inode, sector_t iblock,
>>> return ret;
>>>  }
>>>
>>> +static int ocfs2_dio_should_restart(struct ocfs2_extent_tree *et,
>>> +   struct ocfs2_alloc_context *meta_ac, int max_rec_needed)
>>> +{
>>> +   int status = 0, free_extents;
>>> +
>>> +   free_extents = ocfs2_num_free_extents(et);
>>> +   if (free_extents < 0) {
>>> +   status = free_extents;
>>> +   mlog_errno(status);
>>> +   return status;
>>> +   }
>>> +
>>> +   /*
>>> +* there are two cases which could cause us to EAGAIN in the
>>> +* we-need-more-metadata case:
>>> +* 1) we haven't reserved *any*
>>> +* 2) we are so fragmented, we've needed to add metadata too
>>> +*many times.
>>> +*/
>>> +   if (free_extents < max_rec_needed) {
>>> +   if (!meta_ac || (ocfs2_alloc_context_bits_left(meta_ac)
>>> +   < ocfs2_extend_meta_needed(et->et_root_el)))
>>> +   status = 1;
>>> +   }
>>> +
>>> +   return status;
>>> +}
>>> +
>>> +#define OCFS2_MAX_REC_NEEDED_SPLIT 2
>>>  static int ocfs2_dio_end_io_write(struct inode *inode,
>>>   struct ocfs2_dio_write_ctxt *dwc,
>>>   loff_t offset,
>>> @@ -2281,14 +2310,13 @@ static int ocfs2_dio_end_io_write(struct inode 
>>> *inode,
>>> struct ocfs2_extent_tree et;
>>> struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>> struct ocfs2_inode_info *oi = OCFS2_I(inode);
>>> -   struct ocfs2_unwritten_extent *ue = NULL;
>>> +   struct ocfs2_unwritten_extent *ue = NULL, *tmp_ue;
>>> struct buffer_head *di_bh = NULL;
>>> struct ocfs2_dinode *di;
>>> -   struct ocfs2_alloc_context *data_ac = NULL;
>>> struct ocfs2_alloc_context *meta_ac = NULL;
>>> handle_t *handle = NULL;
>>> loff_t end = offset + bytes;
>>> -   int ret = 0, credits = 0, locked = 0;
>>> +   int ret = 0, credits = 0, locked = 0, restart = 0;
>>>
>>> ocfs2_init_dealloc_ctxt(&dealloc);
>>>
>>> @@ -2330,8 +2358,9 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>>>
>> Should we restart here?
> 
> OK. we should restart before initialized the inode extent tree.
> 
>>
>>> ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
>>>
>>> -   ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
>>> -   &data_ac, &meta_ac);
>>> +restart_all:
>>> +   ret = ocfs2_lock_allocators(inode, &et, 0, OCFS2_MAX_REC_NEEDED_SPLIT,
>>> +   NULL, &meta_ac);
>>> if (ret) {
>>> mlog_errno(ret);
>>> goto unlock;
>>> @@ -2343,7 +2372,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>>> if (IS_ERR(handle)) {
>>> ret = PTR_ERR(handle);
>>> mlog_errno(ret);
>>> -   goto unlock;
>>> +   goto free_ac;
>> I don't think it is a necessary change here.
> 
> we need to free the 'meta_ac' which allocated in ocfs2_lock_allocators() when 
> we
> start transaction failed.
> 
As I described in the last mail, just move the restart check after the
original free meta_ac check.

> Thanks,
> Alex
>>
>>> }
>>> ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
>>>   OCFS2_JOURNAL_ACCESS_WRITE);
>>> @@ -2352,7 +2381,17 @@ static int ocfs2_dio_end_io_write(struct inode 
>>> *inode,
>>> goto commit;
>>> }
>>>
>>> -   list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
>>> +   list_for_each_entry_safe(ue, tmp_ue, &dwc->dw_zero_list, ue_node) {
>>> +   ret = ocfs2_dio_should_restart(&et, meta_ac,
>>> +   OCFS2_MAX_REC_NEEDED_SPLIT * 2);
>>> +   if (ret < 0) {
>>> +   mlog_errno(ret);
>>> +   break;
>>> +   } else if (ret == 1) {
>>> +   restart = 1;
>>> +   break;
>>> +   }
>>> +
>>> re

Re: [Ocfs2-devel] [PATCH] ocfs2: check the metadate alloc before marking extent written

2017-12-04 Thread alex chen
Hi Joseph,

Thanks for your reviews.

On 2017/12/5 10:21, Joseph Qi wrote:
> 
> 
> On 17/12/5 09:41, alex chen wrote:
>> We need to check the free number of the records in each loop to mark
>> extent written, because the last extent block may be changed through
>> many times marking extent written and the 'num_free_extents' also be
>> changed. In the worst case, the 'num_free_extents' may become less
>> than the beginning of the loop. So we should not estimate the free
>> number of the records at the beginning of the loop to mark extent
>> written.
>>
>> Reported-by: John Lightsey 
>> Signed-off-by: Alex Chen 
>> Reviewed-by: Jun Piao 
>> ---
>>  fs/ocfs2/aops.c | 76 
>> -
>>  1 file changed, 64 insertions(+), 12 deletions(-)
>>
>> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
>> index d151632..702aebc 100644
>> --- a/fs/ocfs2/aops.c
>> +++ b/fs/ocfs2/aops.c
>> @@ -2272,6 +2272,35 @@ static int ocfs2_dio_wr_get_block(struct inode 
>> *inode, sector_t iblock,
>>  return ret;
>>  }
>>
>> +static int ocfs2_dio_should_restart(struct ocfs2_extent_tree *et,
>> +struct ocfs2_alloc_context *meta_ac, int max_rec_needed)
>> +{
>> +int status = 0, free_extents;
>> +
>> +free_extents = ocfs2_num_free_extents(et);
>> +if (free_extents < 0) {
>> +status = free_extents;
>> +mlog_errno(status);
>> +return status;
>> +}
>> +
>> +/*
>> + * there are two cases which could cause us to EAGAIN in the
>> + * we-need-more-metadata case:
>> + * 1) we haven't reserved *any*
>> + * 2) we are so fragmented, we've needed to add metadata too
>> + *many times.
>> + */
>> +if (free_extents < max_rec_needed) {
>> +if (!meta_ac || (ocfs2_alloc_context_bits_left(meta_ac)
>> +< ocfs2_extend_meta_needed(et->et_root_el)))
>> +status = 1;
>> +}
>> +
>> +return status;
>> +}
>> +
>> +#define OCFS2_MAX_REC_NEEDED_SPLIT 2
>>  static int ocfs2_dio_end_io_write(struct inode *inode,
>>struct ocfs2_dio_write_ctxt *dwc,
>>loff_t offset,
>> @@ -2281,14 +2310,13 @@ static int ocfs2_dio_end_io_write(struct inode 
>> *inode,
>>  struct ocfs2_extent_tree et;
>>  struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>  struct ocfs2_inode_info *oi = OCFS2_I(inode);
>> -struct ocfs2_unwritten_extent *ue = NULL;
>> +struct ocfs2_unwritten_extent *ue = NULL, *tmp_ue;
>>  struct buffer_head *di_bh = NULL;
>>  struct ocfs2_dinode *di;
>> -struct ocfs2_alloc_context *data_ac = NULL;
>>  struct ocfs2_alloc_context *meta_ac = NULL;
>>  handle_t *handle = NULL;
>>  loff_t end = offset + bytes;
>> -int ret = 0, credits = 0, locked = 0;
>> +int ret = 0, credits = 0, locked = 0, restart = 0;
>>
>>  ocfs2_init_dealloc_ctxt(&dealloc);
>>
>> @@ -2330,8 +2358,9 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>>
> Should we restart here?

OK. we should restart before initialized the inode extent tree.

> 
>>  ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
>>
>> -ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
>> -&data_ac, &meta_ac);
>> +restart_all:
>> +ret = ocfs2_lock_allocators(inode, &et, 0, OCFS2_MAX_REC_NEEDED_SPLIT,
>> +NULL, &meta_ac);
>>  if (ret) {
>>  mlog_errno(ret);
>>  goto unlock;
>> @@ -2343,7 +2372,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>>  if (IS_ERR(handle)) {
>>  ret = PTR_ERR(handle);
>>  mlog_errno(ret);
>> -goto unlock;
>> +goto free_ac;
> I don't think it is a necessary change here.

we need to free the 'meta_ac' which allocated in ocfs2_lock_allocators() when we
start transaction failed.

Thanks,
Alex
> 
>>  }
>>  ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
>>OCFS2_JOURNAL_ACCESS_WRITE);
>> @@ -2352,7 +2381,17 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>>  goto commit;
>>  }
>>
>> -list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
>> +list_for_each_entry_safe(ue, tmp_ue, &dwc->dw_zero_list, ue_node) {
>> +ret = ocfs2_dio_should_restart(&et, meta_ac,
>> +OCFS2_MAX_REC_NEEDED_SPLIT * 2);
>> +if (ret < 0) {
>> +mlog_errno(ret);
>> +break;
>> +} else if (ret == 1) {
>> +restart = 1;
>> +break;
>> +}
>> +
>>  ret = ocfs2_mark_extent_written(inode, &et, handle,
>>  ue->ue_cpos, 1,
>>  ue->ue_phys,
>> @@ -2361,24 +2400,37 @@ static int ocfs2_dio

Re: [Ocfs2-devel] [PATCH] ocfs2: check the metadate alloc before marking extent written

2017-12-04 Thread Joseph Qi


On 17/12/5 09:41, alex chen wrote:
> We need to check the free number of the records in each loop to mark
> extent written, because the last extent block may be changed through
> many times marking extent written and the 'num_free_extents' also be
> changed. In the worst case, the 'num_free_extents' may become less
> than the beginning of the loop. So we should not estimate the free
> number of the records at the beginning of the loop to mark extent
> written.
> 
> Reported-by: John Lightsey 
> Signed-off-by: Alex Chen 
> Reviewed-by: Jun Piao 
> ---
>  fs/ocfs2/aops.c | 76 
> -
>  1 file changed, 64 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index d151632..702aebc 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -2272,6 +2272,35 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, 
> sector_t iblock,
>   return ret;
>  }
> 
> +static int ocfs2_dio_should_restart(struct ocfs2_extent_tree *et,
> + struct ocfs2_alloc_context *meta_ac, int max_rec_needed)
> +{
> + int status = 0, free_extents;
> +
> + free_extents = ocfs2_num_free_extents(et);
> + if (free_extents < 0) {
> + status = free_extents;
> + mlog_errno(status);
> + return status;
> + }
> +
> + /*
> +  * there are two cases which could cause us to EAGAIN in the
> +  * we-need-more-metadata case:
> +  * 1) we haven't reserved *any*
> +  * 2) we are so fragmented, we've needed to add metadata too
> +  *many times.
> +  */
> + if (free_extents < max_rec_needed) {
> + if (!meta_ac || (ocfs2_alloc_context_bits_left(meta_ac)
> + < ocfs2_extend_meta_needed(et->et_root_el)))
> + status = 1;
> + }
> +
> + return status;
> +}
> +
> +#define OCFS2_MAX_REC_NEEDED_SPLIT 2
>  static int ocfs2_dio_end_io_write(struct inode *inode,
> struct ocfs2_dio_write_ctxt *dwc,
> loff_t offset,
> @@ -2281,14 +2310,13 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>   struct ocfs2_extent_tree et;
>   struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>   struct ocfs2_inode_info *oi = OCFS2_I(inode);
> - struct ocfs2_unwritten_extent *ue = NULL;
> + struct ocfs2_unwritten_extent *ue = NULL, *tmp_ue;
>   struct buffer_head *di_bh = NULL;
>   struct ocfs2_dinode *di;
> - struct ocfs2_alloc_context *data_ac = NULL;
>   struct ocfs2_alloc_context *meta_ac = NULL;
>   handle_t *handle = NULL;
>   loff_t end = offset + bytes;
> - int ret = 0, credits = 0, locked = 0;
> + int ret = 0, credits = 0, locked = 0, restart = 0;
> 
>   ocfs2_init_dealloc_ctxt(&dealloc);
> 
> @@ -2330,8 +2358,9 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
> 
Should we restart here?

>   ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
> 
> - ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
> - &data_ac, &meta_ac);
> +restart_all:
> + ret = ocfs2_lock_allocators(inode, &et, 0, OCFS2_MAX_REC_NEEDED_SPLIT,
> + NULL, &meta_ac);
>   if (ret) {
>   mlog_errno(ret);
>   goto unlock;
> @@ -2343,7 +2372,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>   if (IS_ERR(handle)) {
>   ret = PTR_ERR(handle);
>   mlog_errno(ret);
> - goto unlock;
> + goto free_ac;
I don't think it is a necessary change here.

>   }
>   ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
> OCFS2_JOURNAL_ACCESS_WRITE);
> @@ -2352,7 +2381,17 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>   goto commit;
>   }
> 
> - list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
> + list_for_each_entry_safe(ue, tmp_ue, &dwc->dw_zero_list, ue_node) {
> + ret = ocfs2_dio_should_restart(&et, meta_ac,
> + OCFS2_MAX_REC_NEEDED_SPLIT * 2);
> + if (ret < 0) {
> + mlog_errno(ret);
> + break;
> + } else if (ret == 1) {
> + restart = 1;
> + break;
> + }
> +
>   ret = ocfs2_mark_extent_written(inode, &et, handle,
>   ue->ue_cpos, 1,
>   ue->ue_phys,
> @@ -2361,24 +2400,37 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
>   mlog_errno(ret);
>   break;
>   }
> +
> + dwc->dw_zero_count--;
> + list_del_init(&ue->ue_node);
> + spin_lock(&oi->ip_lock);
> + list_del_init(&ue->ue_ip_node);
> + s

[Ocfs2-devel] [PATCH] ocfs2: check the metadate alloc before marking extent written

2017-12-04 Thread alex chen
We need to check the free number of the records in each loop to mark
extent written, because the last extent block may be changed through
many times marking extent written and the 'num_free_extents' also be
changed. In the worst case, the 'num_free_extents' may become less
than the beginning of the loop. So we should not estimate the free
number of the records at the beginning of the loop to mark extent
written.

Reported-by: John Lightsey 
Signed-off-by: Alex Chen 
Reviewed-by: Jun Piao 
---
 fs/ocfs2/aops.c | 76 -
 1 file changed, 64 insertions(+), 12 deletions(-)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d151632..702aebc 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2272,6 +2272,35 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, 
sector_t iblock,
return ret;
 }

+static int ocfs2_dio_should_restart(struct ocfs2_extent_tree *et,
+   struct ocfs2_alloc_context *meta_ac, int max_rec_needed)
+{
+   int status = 0, free_extents;
+
+   free_extents = ocfs2_num_free_extents(et);
+   if (free_extents < 0) {
+   status = free_extents;
+   mlog_errno(status);
+   return status;
+   }
+
+   /*
+* there are two cases which could cause us to EAGAIN in the
+* we-need-more-metadata case:
+* 1) we haven't reserved *any*
+* 2) we are so fragmented, we've needed to add metadata too
+*many times.
+*/
+   if (free_extents < max_rec_needed) {
+   if (!meta_ac || (ocfs2_alloc_context_bits_left(meta_ac)
+   < ocfs2_extend_meta_needed(et->et_root_el)))
+   status = 1;
+   }
+
+   return status;
+}
+
+#define OCFS2_MAX_REC_NEEDED_SPLIT 2
 static int ocfs2_dio_end_io_write(struct inode *inode,
  struct ocfs2_dio_write_ctxt *dwc,
  loff_t offset,
@@ -2281,14 +2310,13 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
struct ocfs2_extent_tree et;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_inode_info *oi = OCFS2_I(inode);
-   struct ocfs2_unwritten_extent *ue = NULL;
+   struct ocfs2_unwritten_extent *ue = NULL, *tmp_ue;
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
-   struct ocfs2_alloc_context *data_ac = NULL;
struct ocfs2_alloc_context *meta_ac = NULL;
handle_t *handle = NULL;
loff_t end = offset + bytes;
-   int ret = 0, credits = 0, locked = 0;
+   int ret = 0, credits = 0, locked = 0, restart = 0;

ocfs2_init_dealloc_ctxt(&dealloc);

@@ -2330,8 +2358,9 @@ static int ocfs2_dio_end_io_write(struct inode *inode,

ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);

-   ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
-   &data_ac, &meta_ac);
+restart_all:
+   ret = ocfs2_lock_allocators(inode, &et, 0, OCFS2_MAX_REC_NEEDED_SPLIT,
+   NULL, &meta_ac);
if (ret) {
mlog_errno(ret);
goto unlock;
@@ -2343,7 +2372,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
-   goto unlock;
+   goto free_ac;
}
ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
  OCFS2_JOURNAL_ACCESS_WRITE);
@@ -2352,7 +2381,17 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
goto commit;
}

-   list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
+   list_for_each_entry_safe(ue, tmp_ue, &dwc->dw_zero_list, ue_node) {
+   ret = ocfs2_dio_should_restart(&et, meta_ac,
+   OCFS2_MAX_REC_NEEDED_SPLIT * 2);
+   if (ret < 0) {
+   mlog_errno(ret);
+   break;
+   } else if (ret == 1) {
+   restart = 1;
+   break;
+   }
+
ret = ocfs2_mark_extent_written(inode, &et, handle,
ue->ue_cpos, 1,
ue->ue_phys,
@@ -2361,24 +2400,37 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
mlog_errno(ret);
break;
}
+
+   dwc->dw_zero_count--;
+   list_del_init(&ue->ue_node);
+   spin_lock(&oi->ip_lock);
+   list_del_init(&ue->ue_ip_node);
+   spin_unlock(&oi->ip_lock);
+   kfree(ue);
}

-   if (end > i_size_read(inode)) {
+   if (!restart && end > i_size_read(inode)) {
ret = ocfs2_set_inode_size(ha