The main idea is following:
 * for background works we check all UBs for exceeding dirty limit.
 * background work goes on if any UB has exceed dirty limit.
 * In that case, writeback will skip inodes if those belong to
      "within dirty-limits UB"

writeback_inodes_wb() gain an new 'struct user_beancounter *ub' argument
which is needed for targeted per-CT writeback. This will be used in the next
patch.

https://jira.sw.ru/browse/PSBM-33841

Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
---
 fs/fs-writeback.c           | 38 ++++++++++++++++++++++++++++++++------
 include/bc/io_acct.h        |  7 ++++++-
 include/linux/backing-dev.h |  2 ++
 kernel/bc/io_acct.c         | 42 ++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ac8066b..ef4f963 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -42,6 +42,7 @@ struct wb_writeback_work {
        struct super_block *sb;
        unsigned long *older_than_this;
        enum writeback_sync_modes sync_mode;
+       unsigned int filter_ub:1;
        unsigned int tagged_writepages:1;
        unsigned int for_kupdate:1;
        unsigned int range_cyclic:1;
@@ -51,6 +52,7 @@ struct wb_writeback_work {
 
        struct list_head list;          /* pending work list */
        struct completion *done;        /* set if the caller waits */
+       struct user_beancounter *ub;
 };
 
 /*
@@ -724,6 +726,13 @@ static long writeback_sb_inodes(struct super_block *sb,
                        trace_writeback_sb_inodes_requeue(inode);
                        continue;
                }
+               if ((work->ub || work->filter_ub) &&
+                    ub_should_skip_writeback(work->ub, inode)) {
+                       spin_unlock(&inode->i_lock);
+                       requeue_io(inode, wb);
+                       continue;
+               }
+
                spin_unlock(&wb->list_lock);
 
                /*
@@ -809,14 +818,16 @@ static long __writeback_inodes_wb(struct bdi_writeback 
*wb,
        return wrote;
 }
 
-static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
-                               enum wb_reason reason)
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+                       enum wb_reason reason, struct user_beancounter *ub)
 {
        struct wb_writeback_work work = {
                .nr_pages       = nr_pages,
                .sync_mode      = WB_SYNC_NONE,
                .range_cyclic   = 1,
                .reason         = reason,
+               .ub             = ub,
+               .filter_ub      = 0,
        };
 
        spin_lock(&wb->list_lock);
@@ -904,8 +915,14 @@ static long wb_writeback(struct bdi_writeback *wb,
                 * For background writeout, stop when we are below the
                 * background dirty threshold
                 */
-               if (work->for_background && !over_bground_thresh(wb->bdi))
-                       break;
+               if (work->for_background) {
+                       if (over_bground_thresh(wb->bdi))
+                               work->filter_ub = 0;
+                       else if (ub_over_bground_thresh())
+                               work->filter_ub = 1;
+                       else
+                               break;
+               }
 
                /*
                 * Kupdate and background works are special and we want to
@@ -996,7 +1013,8 @@ static unsigned long get_nr_dirty_pages(void)
 
 static long wb_check_background_flush(struct bdi_writeback *wb)
 {
-       if (over_bground_thresh(wb->bdi)) {
+       if (over_bground_thresh(wb->bdi) ||
+               ub_over_bground_thresh()) {
 
                struct wb_writeback_work work = {
                        .nr_pages       = LONG_MAX,
@@ -1004,6 +1022,8 @@ static long wb_check_background_flush(struct 
bdi_writeback *wb)
                        .for_background = 1,
                        .range_cyclic   = 1,
                        .reason         = WB_REASON_BACKGROUND,
+                       .filter_ub      = 0,
+                       .ub             = NULL,
                };
 
                return wb_writeback(wb, &work);
@@ -1038,6 +1058,8 @@ static long wb_check_old_data_flush(struct bdi_writeback 
*wb)
                        .for_kupdate    = 1,
                        .range_cyclic   = 1,
                        .reason         = WB_REASON_PERIODIC,
+                       .filter_ub      = 0,
+                       .ub             = NULL,
                };
 
                return wb_writeback(wb, &work);
@@ -1115,7 +1137,7 @@ void bdi_writeback_workfn(struct work_struct *work)
                 * enough for efficient IO.
                 */
                pages_written = writeback_inodes_wb(&bdi->wb, 1024,
-                                                   WB_REASON_FORKER_THREAD);
+                                               WB_REASON_FORKER_THREAD, NULL);
                trace_writeback_pages_written(pages_written);
        }
 
@@ -1432,6 +1454,8 @@ void writeback_inodes_sb_nr(struct super_block *sb,
                .done                   = &done,
                .nr_pages               = nr,
                .reason                 = reason,
+               .filter_ub              = 0,
+               .ub                     = NULL,
        };
 
        if (sb->s_bdi == &noop_backing_dev_info)
@@ -1514,6 +1538,8 @@ void sync_inodes_sb(struct super_block *sb)
                .done           = &done,
                .reason         = WB_REASON_SYNC,
                .for_sync       = 1,
+               .filter_ub      = 0,
+               .ub             = NULL,
        };
 
        /* Nothing to do? */
diff --git a/include/bc/io_acct.h b/include/bc/io_acct.h
index fa7afb1..e0af0bf 100644
--- a/include/bc/io_acct.h
+++ b/include/bc/io_acct.h
@@ -58,7 +58,7 @@ extern void ub_io_writeback_dec(struct address_space 
*mapping);
 
 extern int ub_dirty_limits(unsigned long *pbackground,
                           long *pdirty, struct user_beancounter *ub);
-
+extern bool ub_over_bground_thresh(void);
 extern bool ub_should_skip_writeback(struct user_beancounter *ub,
                                     struct inode *inode);
 
@@ -116,6 +116,11 @@ static inline struct user_beancounter *get_io_ub(void)
        return NULL;
 }
 
+static inline bool ub_over_bground_thresh(void)
+{
+       return false;
+}
+
 #endif /* UBC_IO_ACCT */
 
 #endif
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 859504b..b7668cf 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -130,6 +130,8 @@ int bdi_setup_and_register(struct backing_dev_info *, char 
*, unsigned int);
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
                        enum wb_reason reason);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+                       enum wb_reason reason, struct user_beancounter *ub);
 void bdi_writeback_workfn(struct work_struct *work);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
diff --git a/kernel/bc/io_acct.c b/kernel/bc/io_acct.c
index f9778f8..e863ce7 100644
--- a/kernel/bc/io_acct.c
+++ b/kernel/bc/io_acct.c
@@ -126,12 +126,48 @@ void ub_io_writeback_dec(struct address_space *mapping)
        }
 }
 
+static bool __ub_over_bground_thresh(struct user_beancounter *ub)
+{
+       unsigned long background_thresh, dirty_thresh;
+       unsigned long ub_dirty, ub_writeback;
+
+       ub_dirty_limits(&background_thresh, &dirty_thresh, ub);
+
+       ub_dirty = ub_stat_get(ub, dirty_pages);
+       ub_writeback = ub_stat_get(ub, writeback_pages);
+
+       if (ub_dirty + ub_writeback >= background_thresh)
+               return true;
+
+       return false;
+}
+
+bool ub_over_bground_thresh(void)
+{
+       struct user_beancounter *ub;
+       bool ret = false;
+
+       rcu_read_lock();
+       for_each_beancounter(ub) {
+               if (ub == get_ub0())
+                       continue;
+               if (__ub_over_bground_thresh(ub)) {
+                       ret = true;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return ret;
+}
+
 int ub_dirty_limits(unsigned long *pbackground,
                    long *pdirty, struct user_beancounter *ub)
 {
        int dirty_ratio;
        unsigned long available_memory;
 
+       *pdirty = *pbackground = LONG_MAX;
+
        dirty_ratio = ub_dirty_ratio;
        if (!dirty_ratio)
                return 0;
@@ -157,8 +193,10 @@ bool ub_should_skip_writeback(struct user_beancounter *ub, 
struct inode *inode)
 
        rcu_read_lock();
        dirtied_ub = rcu_dereference(inode->i_mapping->dirtied_ub);
-       ret = !dirtied_ub || (dirtied_ub != ub &&
-                       !test_bit(UB_DIRTY_EXCEEDED, &dirtied_ub->ub_flags));
+       if (ub)
+               ret = (ub != dirtied_ub);
+       else
+               ret = (dirtied_ub && !ub_over_bground_thresh());
        rcu_read_unlock();
 
        return ret;
-- 
2.4.10

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to