The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.9.x-ovz" and will appear 
at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.9.24
------>
commit a9eb8cde445d30601507a2bb3b638611d1a93cd2
Author: Andrey Ryabinin <aryabi...@virtuozzo.com>
Date:   Fri Jan 22 13:35:13 2016 +0400

    ve/fs/writeback: per-CT fs writeback
    
    The main idea is following:
     * for background works we check all UBs for exceeding dirty limit.
     * background work goes on if any UB has exceed dirty limit.
     * In that case, writeback will skip inodes if those belong to
          "within dirty-limits UB"
    
    writeback_inodes_wb() gain an new 'struct user_beancounter *ub' argument
    which is needed for targeted per-CT writeback. This will be used in the next
    patch.
    
    https://jira.sw.ru/browse/PSBM-33841
    
    Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
    Reviewed-by: Vladimir Davydov <vdavy...@virtuozzo.com>
---
 fs/fs-writeback.c           | 29 +++++++++++++++++++++++------
 include/bc/io_acct.h        |  7 ++++++-
 include/linux/backing-dev.h |  2 ++
 kernel/bc/io_acct.c         | 42 ++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ac8066b..7b83367 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -42,6 +42,7 @@ struct wb_writeback_work {
        struct super_block *sb;
        unsigned long *older_than_this;
        enum writeback_sync_modes sync_mode;
+       unsigned int filter_ub:1;
        unsigned int tagged_writepages:1;
        unsigned int for_kupdate:1;
        unsigned int range_cyclic:1;
@@ -51,6 +52,7 @@ struct wb_writeback_work {
 
        struct list_head list;          /* pending work list */
        struct completion *done;        /* set if the caller waits */
+       struct user_beancounter *ub;
 };
 
 /*
@@ -724,6 +726,13 @@ static long writeback_sb_inodes(struct super_block *sb,
                        trace_writeback_sb_inodes_requeue(inode);
                        continue;
                }
+               if ((work->ub || work->filter_ub) &&
+                    ub_should_skip_writeback(work->ub, inode)) {
+                       spin_unlock(&inode->i_lock);
+                       requeue_io(inode, wb);
+                       continue;
+               }
+
                spin_unlock(&wb->list_lock);
 
                /*
@@ -809,14 +818,15 @@ static long __writeback_inodes_wb(struct bdi_writeback 
*wb,
        return wrote;
 }
 
-static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
-                               enum wb_reason reason)
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+                       enum wb_reason reason, struct user_beancounter *ub)
 {
        struct wb_writeback_work work = {
                .nr_pages       = nr_pages,
                .sync_mode      = WB_SYNC_NONE,
                .range_cyclic   = 1,
                .reason         = reason,
+               .ub             = ub,
        };
 
        spin_lock(&wb->list_lock);
@@ -904,8 +914,14 @@ static long wb_writeback(struct bdi_writeback *wb,
                 * For background writeout, stop when we are below the
                 * background dirty threshold
                 */
-               if (work->for_background && !over_bground_thresh(wb->bdi))
-                       break;
+               if (work->for_background) {
+                       if (over_bground_thresh(wb->bdi))
+                               work->filter_ub = 0;
+                       else if (ub_over_bground_thresh())
+                               work->filter_ub = 1;
+                       else
+                               break;
+               }
 
                /*
                 * Kupdate and background works are special and we want to
@@ -996,7 +1012,8 @@ static unsigned long get_nr_dirty_pages(void)
 
 static long wb_check_background_flush(struct bdi_writeback *wb)
 {
-       if (over_bground_thresh(wb->bdi)) {
+       if (over_bground_thresh(wb->bdi) ||
+               ub_over_bground_thresh()) {
 
                struct wb_writeback_work work = {
                        .nr_pages       = LONG_MAX,
@@ -1115,7 +1132,7 @@ void bdi_writeback_workfn(struct work_struct *work)
                 * enough for efficient IO.
                 */
                pages_written = writeback_inodes_wb(&bdi->wb, 1024,
-                                                   WB_REASON_FORKER_THREAD);
+                                               WB_REASON_FORKER_THREAD, NULL);
                trace_writeback_pages_written(pages_written);
        }
 
diff --git a/include/bc/io_acct.h b/include/bc/io_acct.h
index fa7afb1..e0af0bf 100644
--- a/include/bc/io_acct.h
+++ b/include/bc/io_acct.h
@@ -58,7 +58,7 @@ extern void ub_io_writeback_dec(struct address_space 
*mapping);
 
 extern int ub_dirty_limits(unsigned long *pbackground,
                           long *pdirty, struct user_beancounter *ub);
-
+extern bool ub_over_bground_thresh(void);
 extern bool ub_should_skip_writeback(struct user_beancounter *ub,
                                     struct inode *inode);
 
@@ -116,6 +116,11 @@ static inline struct user_beancounter *get_io_ub(void)
        return NULL;
 }
 
+static inline bool ub_over_bground_thresh(void)
+{
+       return false;
+}
+
 #endif /* UBC_IO_ACCT */
 
 #endif
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 859504b..b7668cf 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -130,6 +130,8 @@ int bdi_setup_and_register(struct backing_dev_info *, char 
*, unsigned int);
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
                        enum wb_reason reason);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+                       enum wb_reason reason, struct user_beancounter *ub);
 void bdi_writeback_workfn(struct work_struct *work);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
diff --git a/kernel/bc/io_acct.c b/kernel/bc/io_acct.c
index f9778f8..ce41829 100644
--- a/kernel/bc/io_acct.c
+++ b/kernel/bc/io_acct.c
@@ -126,12 +126,48 @@ void ub_io_writeback_dec(struct address_space *mapping)
        }
 }
 
+static bool __ub_over_bground_thresh(struct user_beancounter *ub)
+{
+       unsigned long background_thresh, dirty_thresh;
+       unsigned long ub_dirty, ub_writeback;
+
+       ub_dirty_limits(&background_thresh, &dirty_thresh, ub);
+
+       ub_dirty = ub_stat_get(ub, dirty_pages);
+       ub_writeback = ub_stat_get(ub, writeback_pages);
+
+       if (ub_dirty + ub_writeback >= background_thresh)
+               return true;
+
+       return false;
+}
+
+bool ub_over_bground_thresh(void)
+{
+       struct user_beancounter *ub;
+       bool ret = false;
+
+       rcu_read_lock();
+       for_each_beancounter(ub) {
+               if (ub == get_ub0())
+                       continue;
+               if (__ub_over_bground_thresh(ub)) {
+                       ret = true;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return ret;
+}
+
 int ub_dirty_limits(unsigned long *pbackground,
                    long *pdirty, struct user_beancounter *ub)
 {
        int dirty_ratio;
        unsigned long available_memory;
 
+       *pdirty = *pbackground = LONG_MAX;
+
        dirty_ratio = ub_dirty_ratio;
        if (!dirty_ratio)
                return 0;
@@ -157,8 +193,10 @@ bool ub_should_skip_writeback(struct user_beancounter *ub, 
struct inode *inode)
 
        rcu_read_lock();
        dirtied_ub = rcu_dereference(inode->i_mapping->dirtied_ub);
-       ret = !dirtied_ub || (dirtied_ub != ub &&
-                       !test_bit(UB_DIRTY_EXCEEDED, &dirtied_ub->ub_flags));
+       if (ub)
+               ret = (ub != dirtied_ub);
+       else
+               ret = (dirtied_ub && !__ub_over_bground_thresh(dirtied_ub));
        rcu_read_unlock();
 
        return ret;
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to