That is a good suggestion, thanks.

thanks.

Andreas Dilger wrote on 2020/8/7 16:15:
> It would be good to include these performance results in the commit message, 
> so that the results are available to the patch reviewers and in the future if 
> this code is changed.
> 
> On Wed, Aug 5, 2020 at 3:16 AM brookxu <brookxu...@gmail.com 
> <mailto:brookxu...@gmail.com>> wrote:
> 
>     Add more... , As we expected, the running time of the test process is
>     reduced significantly.
> 
>     Running time on unrepaired kernel:
>     [root@TENCENT64 ~]# time taskset 0x01 ./sparse /data1/sparce.dat
>     real    0m2.051s
>     user    0m0.008s
>     sys    0m2.026s
> 
>     Running time on repaired kernel:
>     [root@TENCENT64 ~]# time taskset 0x01 ./sparse /data1/sparce.dat
>     real    0m0.471s
>     user    0m0.004s
>     sys        0m0.395s
> 
>     Thanks.
> 
>     Andreas Dilger wrote on 2020/8/5 12:53:
>     > On Aug 4, 2020, at 7:02 PM, brookxu <brookxu...@gmail.com 
> <mailto:brookxu...@gmail.com>> wrote:
>     >> In the scenario of writing sparse files, the Per-inode prealloc list 
> may
>     >> be very long, resulting in high overhead for 
> ext4_mb_use_preallocated().
>     >> To circumvent this problem, we limit the maximum length of per-inode
>     >> prealloc list to 512 and allow users to modify it.
>     >>
>     >> Signed-off-by: Chunguang Xu <broo...@tencent.com 
> <mailto:broo...@tencent.com>>
>     > Do you have any kind of measurements that show the benefit of this 
> patch?
>     > For example performance improvement, memory or CPU usage before and 
> after?
>     > How long is "very long"?
>     >
>     > Cheers, Andreas
>     >
>     >> ---
>     >>  fs/ext4/ext4.h        |  3 ++-
>     >>  fs/ext4/extents.c     | 10 ++++-----
>     >>  fs/ext4/file.c        |  2 +-
>     >>  fs/ext4/indirect.c    |  2 +-
>     >>  fs/ext4/inode.c       |  6 +++---
>     >>  fs/ext4/ioctl.c       |  2 +-
>     >>  fs/ext4/mballoc.c     | 57 
> +++++++++++++++++++++++++++++++++++++++++++++++----
>     >>  fs/ext4/mballoc.h     |  4 ++++
>     >>  fs/ext4/move_extent.c |  4 ++--
>     >>  fs/ext4/super.c       |  2 +-
>     >>  fs/ext4/sysfs.c       |  2 ++
>     >>  11 files changed, 75 insertions(+), 19 deletions(-)
>     >>
>     >> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
>     >> index 42f5060..68e0ebe 100644
>     >> --- a/fs/ext4/ext4.h
>     >> +++ b/fs/ext4/ext4.h
>     >> @@ -1501,6 +1501,7 @@ struct ext4_sb_info {
>     >>      unsigned int s_mb_stats;
>     >>      unsigned int s_mb_order2_reqs;
>     >>      unsigned int s_mb_group_prealloc;
>     >> +    unsigned int s_mb_max_inode_prealloc;
>     >>      unsigned int s_max_dir_size_kb;
>     >>      /* where last allocation was done - for stream allocation */
>     >>      unsigned long s_mb_last_group;
>     >> @@ -2651,7 +2652,7 @@ extern int ext4_init_inode_table(struct 
> super_block *sb,
>     >>  extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
>     >>                  struct ext4_allocation_request *, int *);
>     >>  extern int ext4_mb_reserve_blocks(struct super_block *, int);
>     >> -extern void ext4_discard_preallocations(struct inode *);
>     >> +extern void ext4_discard_preallocations(struct inode *, unsigned int);
>     >>  extern int __init ext4_init_mballoc(void);
>     >>  extern void ext4_exit_mballoc(void);
>     >>  extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
>     >> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>     >> index 221f240..a40f928 100644
>     >> --- a/fs/ext4/extents.c
>     >> +++ b/fs/ext4/extents.c
>     >> @@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode 
> *inode, int *dropped)
>     >>       * i_mutex. So we can safely drop the i_data_sem here.
>     >>       */
>     >>      BUG_ON(EXT4_JOURNAL(inode) == NULL);
>     >> -    ext4_discard_preallocations(inode);
>     >> +    ext4_discard_preallocations(inode, 0);
>     >>      up_write(&EXT4_I(inode)->i_data_sem);
>     >>      *dropped = 1;
>     >>      return 0;
>     >> @@ -4272,7 +4272,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct 
> inode *inode,
>     >>               * not a good idea to call discard here directly,
>     >>               * but otherwise we'd need to call it every free().
>     >>               */
>     >> -            ext4_discard_preallocations(inode);
>     >> +            ext4_discard_preallocations(inode, 0);
>     >>              if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
>     >>                  fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
>     >>              ext4_free_blocks(handle, inode, NULL, newblock,
>     >> @@ -5299,7 +5299,7 @@ static int ext4_collapse_range(struct inode 
> *inode, loff_t offset, loff_t len)
>     >>      }
>     >>
>     >>      down_write(&EXT4_I(inode)->i_data_sem);
>     >> -    ext4_discard_preallocations(inode);
>     >> +    ext4_discard_preallocations(inode, 0);
>     >>
>     >>      ret = ext4_es_remove_extent(inode, punch_start,
>     >>                      EXT_MAX_BLOCKS - punch_start);
>     >> @@ -5313,7 +5313,7 @@ static int ext4_collapse_range(struct inode 
> *inode, loff_t offset, loff_t len)
>     >>          up_write(&EXT4_I(inode)->i_data_sem);
>     >>          goto out_stop;
>     >>      }
>     >> -    ext4_discard_preallocations(inode);
>     >> +    ext4_discard_preallocations(inode, 0);
>     >>
>     >>      ret = ext4_ext_shift_extents(inode, handle, punch_stop,
>     >>                       punch_stop - punch_start, SHIFT_LEFT);
>     >> @@ -5445,7 +5445,7 @@ static int ext4_insert_range(struct inode 
> *inode, loff_t offset, loff_t len)
>     >>          goto out_stop;
>     >>
>     >>      down_write(&EXT4_I(inode)->i_data_sem);
>     >> -    ext4_discard_preallocations(inode);
>     >> +    ext4_discard_preallocations(inode, 0);
>     >>
>     >>      path = ext4_find_extent(inode, offset_lblk, NULL, 0);
>     >>      if (IS_ERR(path)) {
>     >> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
>     >> index 2a01e31..e3ab8ea 100644
>     >> --- a/fs/ext4/file.c
>     >> +++ b/fs/ext4/file.c
>     >> @@ -148,7 +148,7 @@ static int ext4_release_file(struct inode *inode, 
> struct file *filp)
>     >>                  !EXT4_I(inode)->i_reserved_data_blocks)
>     >>      {
>     >>          down_write(&EXT4_I(inode)->i_data_sem);
>     >> -        ext4_discard_preallocations(inode);
>     >> +        ext4_discard_preallocations(inode, 0);
>     >>          up_write(&EXT4_I(inode)->i_data_sem);
>     >>      }
>     >>      if (is_dx(inode) && filp->private_data)
>     >> diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
>     >> index be2b66e..ec6b930 100644
>     >> --- a/fs/ext4/indirect.c
>     >> +++ b/fs/ext4/indirect.c
>     >> @@ -696,7 +696,7 @@ static int ext4_ind_trunc_restart_fn(handle_t 
> *handle, struct inode *inode,
>     >>       * i_mutex. So we can safely drop the i_data_sem here.
>     >>       */
>     >>      BUG_ON(EXT4_JOURNAL(inode) == NULL);
>     >> -    ext4_discard_preallocations(inode);
>     >> +    ext4_discard_preallocations(inode, 0);
>     >>      up_write(&EXT4_I(inode)->i_data_sem);
>     >>      *dropped = 1;
>     >>      return 0;
>     >> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
>     >> index 10dd470..bb9e1cd 100644
>     >> --- a/fs/ext4/inode.c
>     >> +++ b/fs/ext4/inode.c
>     >> @@ -383,7 +383,7 @@ void ext4_da_update_reserve_space(struct inode 
> *inode,
>     >>       */
>     >>      if ((ei->i_reserved_data_blocks == 0) &&
>     >>          !inode_is_open_for_write(inode))
>     >> -        ext4_discard_preallocations(inode);
>     >> +        ext4_discard_preallocations(inode, 0);
>     >>  }
>     >>
>     >>  static int __check_block_validity(struct inode *inode, const char 
> *func,
>     >> @@ -4056,7 +4056,7 @@ int ext4_punch_hole(struct inode *inode, loff_t 
> offset, loff_t length)
>     >>      if (stop_block > first_block) {
>     >>
>     >>          down_write(&EXT4_I(inode)->i_data_sem);
>     >> -        ext4_discard_preallocations(inode);
>     >> +        ext4_discard_preallocations(inode, 0);
>     >>
>     >>          ret = ext4_es_remove_extent(inode, first_block,
>     >>                          stop_block - first_block);
>     >> @@ -4211,7 +4211,7 @@ int ext4_truncate(struct inode *inode)
>     >>
>     >>      down_write(&EXT4_I(inode)->i_data_sem);
>     >>
>     >> -    ext4_discard_preallocations(inode);
>     >> +    ext4_discard_preallocations(inode, 0);
>     >>
>     >>      if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
>     >>          err = ext4_ext_truncate(handle, inode);
>     >> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
>     >> index 999cf6a..a5fcc23 100644
>     >> --- a/fs/ext4/ioctl.c
>     >> +++ b/fs/ext4/ioctl.c
>     >> @@ -202,7 +202,7 @@ static long swap_inode_boot_loader(struct 
> super_block *sb,
>     >>      reset_inode_seed(inode);
>     >>      reset_inode_seed(inode_bl);
>     >>
>     >> -    ext4_discard_preallocations(inode);
>     >> +    ext4_discard_preallocations(inode, 0);
>     >>
>     >>      err = ext4_mark_inode_dirty(handle, inode);
>     >>      if (err < 0) {
>     >> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>     >> index 4f21f34..28a139f 100644
>     >> --- a/fs/ext4/mballoc.c
>     >> +++ b/fs/ext4/mballoc.c
>     >> @@ -2736,6 +2736,7 @@ int ext4_mb_init(struct super_block *sb)
>     >>      sbi->s_mb_stats = MB_DEFAULT_STATS;
>     >>      sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
>     >>      sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
>     >> +    sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
>     >>      /*
>     >>       * The default group preallocation is 512, which for 4k block
>     >>       * sizes translates to 2 megabytes.  However for bigalloc file
>     >> @@ -4103,7 +4104,7 @@ static void ext4_mb_new_preallocation(struct 
> ext4_allocation_context *ac)
>     >>   *
>     >>   * FIXME!! Make sure it is valid at all the call sites
>     >>   */
>     >> -void ext4_discard_preallocations(struct inode *inode)
>     >> +void ext4_discard_preallocations(struct inode *inode, unsigned int 
> needed)
>     >>  {
>     >>      struct ext4_inode_info *ei = EXT4_I(inode);
>     >>      struct super_block *sb = inode->i_sb;
>     >> @@ -4121,15 +4122,18 @@ void ext4_discard_preallocations(struct inode 
> *inode)
>     >>
>     >>      mb_debug(sb, "discard preallocation for inode %lu\n",
>     >>           inode->i_ino);
>     >> -    trace_ext4_discard_preallocations(inode);
>     >> +    trace_ext4_discard_preallocations(inode,  needed);
>     >>
>     >>      INIT_LIST_HEAD(&list);
>     >>
>     >> +    if (needed == 0)
>     >> +        needed = UINT_MAX;
>     >> +
>     >>  repeat:
>     >>      /* first, collect all pa's in the inode */
>     >>      spin_lock(&ei->i_prealloc_lock);
>     >> -    while (!list_empty(&ei->i_prealloc_list)) {
>     >> -        pa = list_entry(ei->i_prealloc_list.next,
>     >> +    while (!list_empty(&ei->i_prealloc_list) && needed) {
>     >> +        pa = list_entry(ei->i_prealloc_list.prev,
>     >>                  struct ext4_prealloc_space, pa_inode_list);
>     >>          BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
>     >>          spin_lock(&pa->pa_lock);
>     >> @@ -4150,6 +4154,7 @@ void ext4_discard_preallocations(struct inode 
> *inode)
>     >>              spin_unlock(&pa->pa_lock);
>     >>              list_del_rcu(&pa->pa_inode_list);
>     >>              list_add(&pa->u.pa_tmp_list, &list);
>     >> +            needed--;
>     >>              continue;
>     >>          }
>     >>
>     >> @@ -4549,10 +4554,42 @@ static void ext4_mb_add_n_trim(struct 
> ext4_allocation_context *ac)
>     >>  }
>     >>
>     >>  /*
>     >> + * if per-inode prealloc list is too long, trim some PA
>     >> + */
>     >> +static void
>     >> +ext4_mb_trim_inode_pa(struct inode *inode)
>     >> +{
>     >> +    struct ext4_inode_info *ei = EXT4_I(inode);
>     >> +    struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
>     >> +    struct ext4_prealloc_space *pa;
>     >> +    int count = 0, delta;
>     >> +
>     >> +    rcu_read_lock();
>     >> +    list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
>     >> +        spin_lock(&pa->pa_lock);
>     >> +        if (pa->pa_deleted) {
>     >> +            spin_unlock(&pa->pa_lock);
>     >> +            continue;
>     >> +        }
>     >> +        count++;
>     >> +        spin_unlock(&pa->pa_lock);
>     >> +    }
>     >> +    rcu_read_unlock();
>     >> +
>     >> +    delta = (sbi->s_mb_max_inode_prealloc >> 2) + 1;
>     >> +    if (count > sbi->s_mb_max_inode_prealloc + delta) {
>     >> +        count -= sbi->s_mb_max_inode_prealloc;
>     >> +        ext4_discard_preallocations(inode, count);
>     >> +    }
>     >> +}
>     >> +
>     >> +/*
>     >>   * release all resource we used in allocation
>     >>   */
>     >>  static int ext4_mb_release_context(struct ext4_allocation_context *ac)
>     >>  {
>     >> +    struct inode *inode = ac->ac_inode;
>     >> +    struct ext4_inode_info *ei = EXT4_I(inode);
>     >>      struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
>     >>      struct ext4_prealloc_space *pa = ac->ac_pa;
>     >>      if (pa) {
>     >> @@ -4578,6 +4615,17 @@ static int ext4_mb_release_context(struct 
> ext4_allocation_context *ac)
>     >>                  ext4_mb_add_n_trim(ac);
>     >>              }
>     >>          }
>     >> +
>     >> +        if (pa->pa_type == MB_INODE_PA) {
>     >> +            /*
>     >> +             * treat per-inode prealloc list as a lru list, then try
>     >> +             * to trim the least recently used PA.
>     >> +             */
>     >> +            spin_lock(pa->pa_obj_lock);
>     >> +            list_move(&ei->i_prealloc_list, &pa->pa_inode_list);
>     >> +            spin_unlock(pa->pa_obj_lock);
>     >> +        }
>     >> +
>     >>          ext4_mb_put_pa(ac, ac->ac_sb, pa);
>     >>      }
>     >>      if (ac->ac_bitmap_page)
>     >> @@ -4587,6 +4635,7 @@ static int ext4_mb_release_context(struct 
> ext4_allocation_context *ac)
>     >>      if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
>     >>          mutex_unlock(&ac->ac_lg->lg_mutex);
>     >>      ext4_mb_collect_stats(ac);
>     >> +    ext4_mb_trim_inode_pa(inode);
>     >>      return 0;
>     >>  }
>     >>
>     >> diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
>     >> index 6b4d17c..e75b474 100644
>     >> --- a/fs/ext4/mballoc.h
>     >> +++ b/fs/ext4/mballoc.h
>     >> @@ -73,6 +73,10 @@
>     >>   */
>     >>  #define MB_DEFAULT_GROUP_PREALLOC    512
>     >>
>     >> +/*
>     >> + * maximum length of inode prealloc list
>     >> + */
>     >> +#define MB_DEFAULT_MAX_INODE_PREALLOC    512
>     >>
>     >>  struct ext4_free_data {
>     >>      /* this links the free block information from sb_info */
>     >> diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
>     >> index 1ed86fb..0d601b8 100644
>     >> --- a/fs/ext4/move_extent.c
>     >> +++ b/fs/ext4/move_extent.c
>     >> @@ -686,8 +686,8 @@
>     >>
>     >>  out:
>     >>      if (*moved_len) {
>     >> -        ext4_discard_preallocations(orig_inode);
>     >> -        ext4_discard_preallocations(donor_inode);
>     >> +        ext4_discard_preallocations(orig_inode, 0);
>     >> +        ext4_discard_preallocations(donor_inode, 0);
>     >>      }
>     >>
>     >>      ext4_ext_drop_refs(path);
>     >> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
>     >> index 330957e..8ce61f3 100644
>     >> --- a/fs/ext4/super.c
>     >> +++ b/fs/ext4/super.c
>     >> @@ -1216,7 +1216,7 @@ void ext4_clear_inode(struct inode *inode)
>     >>  {
>     >>      invalidate_inode_buffers(inode);
>     >>      clear_inode(inode);
>     >> -    ext4_discard_preallocations(inode);
>     >> +    ext4_discard_preallocations(inode, 0);
>     >>      ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
>     >>      dquot_drop(inode);
>     >>      if (EXT4_I(inode)->jinode) {
>     >> diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
>     >> index 6c9fc9e..92f04e9 100644
>     >> --- a/fs/ext4/sysfs.c
>     >> +++ b/fs/ext4/sysfs.c
>     >> @@ -215,6 +215,7 @@ static ssize_t journal_task_show(struct 
> ext4_sb_info *sbi, char *buf)
>     >>  EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
>     >>  EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
>     >>  EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
>     >> +EXT4_RW_ATTR_SBI_UI(mb_max_inode_prealloc, s_mb_max_inode_prealloc);
>     >>  EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
>     >>  EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
>     >>  EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, 
> s_err_ratelimit_state.interval);
>     >> @@ -257,6 +258,7 @@ static ssize_t journal_task_show(struct 
> ext4_sb_info *sbi, char *buf)
>     >>      ATTR_LIST(mb_order2_req),
>     >>      ATTR_LIST(mb_stream_req),
>     >>      ATTR_LIST(mb_group_prealloc),
>     >> +    ATTR_LIST(mb_max_inode_prealloc),
>     >>      ATTR_LIST(max_writeback_mb_bump),
>     >>      ATTR_LIST(extent_max_zeroout_kb),
>     >>      ATTR_LIST(trigger_fs_error),
>     >>
>     >> --
>     >> 1.8.3.1
>     >>
>     >
>     > Cheers, Andreas
>     >
>     >
>     >
>     >
>     >
> 

Reply via email to