The commit is pushed to "branch-rh7-3.10.0-327.18.2.vz7.14.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.18.2.vz7.14.26
------>
commit 762758f3d67255a262b2f4b44f6694d557896aed
Author: Dmitry Monakhov <dmonak...@openvz.org>
Date:   Sun Jul 17 17:33:25 2016 +0400

    ms/ext4: improve ext4lazyinit scalability
    
    ext4lazyinit is global thread. This thread performs itable initalization 
under
    
    It basically does followes:
    ext4_lazyinit_thread
      ->mutex_lock(&eli->li_list_mtx);
      ->ext4_run_li_request(elr)
        ->ext4_init_inode_table-> Do a lot of IO if list is large
    
    And when new mounts/umount arrives they have to block on ->li_list_mtx
    because  lazy_thread holds it during full walk procedure.
    ext4_fill_super
     ->ext4_register_li_request
       ->mutex_lock(&ext4_li_info->li_list_mtx);
       ->list_add(&elr->lr_request, &ext4_li_info >li_request_list);
    In my case mount takes 40minutes on server with 36 * 4Tb HDD.
    Convinient user may face this in case of very slow dev ( /dev/mmcblkXXX)
    Even more. I one of filesystem was frozen lazyinit_thread will simply blocks
    on sb_start_write() so other mount/umounts will suck forever.
    
    This patch changes logic like follows:
    - grap ->s_umount read sem process new li_request first, after that it is 
safe
      to drop list_mtx because all callers of li_remove_requers are holds 
->s_umount
      for write.
    - li_thread skip frozen SB's
    
    Locking:
    Locking order is asserted by umout path like follows: s_umount ->li_list_mtx
    so the only way to to grab ->s_mount inside li_thread is via 
down_read_trylock
    
    https://jira.sw.ru/browse/PSBM-49658
    
    Signed-off-by: Dmitry Monakhov <dmonak...@openvz.org>
---
 fs/ext4/super.c | 53 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9273813..c0e7acd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3056,7 +3056,6 @@ static int ext4_run_li_request(struct ext4_li_request 
*elr)
        sb = elr->lr_super;
        ngroups = EXT4_SB(sb)->s_groups_count;
 
-       sb_start_write(sb);
        for (group = elr->lr_next_group; group < ngroups; group++) {
                gdp = ext4_get_group_desc(sb, group, NULL);
                if (!gdp) {
@@ -3083,8 +3082,6 @@ static int ext4_run_li_request(struct ext4_li_request 
*elr)
                elr->lr_next_sched = jiffies + elr->lr_timeout;
                elr->lr_next_group = group + 1;
        }
-       sb_end_write(sb);
-
        return ret;
 }
 
@@ -3134,9 +3131,9 @@ static struct task_struct *ext4_lazyinit_task;
 static int ext4_lazyinit_thread(void *arg)
 {
        struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
-       struct list_head *pos, *n;
        struct ext4_li_request *elr;
        unsigned long next_wakeup, cur;
+       LIST_HEAD(request_list);
 
        BUG_ON(NULL == eli);
 
@@ -3149,21 +3146,43 @@ cont_thread:
                        mutex_unlock(&eli->li_list_mtx);
                        goto exit_thread;
                }
-
-               list_for_each_safe(pos, n, &eli->li_request_list) {
-                       elr = list_entry(pos, struct ext4_li_request,
-                                        lr_request);
-
-                       if (time_after_eq(jiffies, elr->lr_next_sched)) {
-                               if (ext4_run_li_request(elr) != 0) {
-                                       /* error, remove the lazy_init job */
-                                       ext4_remove_li_request(elr);
-                                       continue;
+               list_splice_init(&eli->li_request_list, &request_list);
+               while (!list_empty(&request_list)) {
+                       int err = 0;
+                       int progress = 0;
+
+                       elr = list_entry(request_list.next,
+                                        struct ext4_li_request, lr_request);
+                       list_move(request_list.next, &eli->li_request_list);
+                       if (time_before(jiffies, elr->lr_next_sched)) {
+                               if (time_before(elr->lr_next_sched, 
next_wakeup))
+                                       next_wakeup = elr->lr_next_sched;
+                               continue;
+                       }
+                       if (down_read_trylock(&elr->lr_super->s_umount)) {
+                               if (sb_start_write_trylock(elr->lr_super)) {
+                                       progress = 1;
+                                       /* We holds sb->s_umount, sb can not
+                                        * be removed from the list, it is
+                                        * now safe to drop li_list_mtx
+                                        */
+                                       mutex_unlock(&eli->li_list_mtx);
+                                       err = ext4_run_li_request(elr);
+                                       sb_end_write(elr->lr_super);
+                                       mutex_lock(&eli->li_list_mtx);
                                }
+                               up_read((&elr->lr_super->s_umount));
+                       }
+                       /* error, remove the lazy_init job */
+                       if (err) {
+                               ext4_remove_li_request(elr);
+                               continue;
+                       }
+                       if (!progress) {
+                               elr->lr_next_sched = jiffies +
+                                       (prandom_u32()
+                                        % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
                        }
-
-                       if (time_before(elr->lr_next_sched, next_wakeup))
-                               next_wakeup = elr->lr_next_sched;
                }
                mutex_unlock(&eli->li_list_mtx);
 
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to