When demoting to PMEM node, the target node may have memory pressure,
then the memory pressure may cause migrate_pages() fail.

If the failure is caused by memory pressure (i.e. returning -ENOMEM),
tag the node with PGDAT_CONTENDED.  The tag would be cleared once the
target node is balanced again.

Check if the target node is PGDAT_CONTENDED or not, if it is just skip
demotion.

Signed-off-by: Yang Shi <yang....@linux.alibaba.com>
---
 include/linux/mmzone.h |  3 +++
 mm/vmscan.c            | 28 ++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fba7741..de534db 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -520,6 +520,9 @@ enum pgdat_flags {
                                         * many pages under writeback
                                         */
        PGDAT_RECLAIM_LOCKED,           /* prevents concurrent reclaim */
+       PGDAT_CONTENDED,                /* the node has not enough free memory
+                                        * available
+                                        */
 };
 
 enum zone_flags {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 80cd624..50cde53 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1048,6 +1048,9 @@ static void page_check_dirty_writeback(struct page *page,
 
 static inline bool is_demote_ok(int nid, struct scan_control *sc)
 {
+       int node;
+       nodemask_t used_mask;
+
        /* It is pointless to do demotion in memcg reclaim */
        if (!global_reclaim(sc))
                return false;
@@ -1060,6 +1063,13 @@ static inline bool is_demote_ok(int nid, struct 
scan_control *sc)
        if (!has_cpuless_node_online())
                return false;
 
+       /* Check if the demote target node is contended or not */
+       nodes_clear(used_mask);
+       node = find_next_best_node(nid, &used_mask, true);
+
+       if (test_bit(PGDAT_CONTENDED, &NODE_DATA(node)->flags))
+               return false;
+
        return true;
 }
 
@@ -1502,6 +1512,10 @@ static unsigned long shrink_page_list(struct list_head 
*page_list,
                nr_reclaimed += nr_succeeded;
 
                if (err) {
+                       if (err == -ENOMEM)
+                               set_bit(PGDAT_CONTENDED,
+                                       &NODE_DATA(target_nid)->flags);
+
                        putback_movable_pages(&demote_pages);
 
                        list_splice(&ret_pages, &demote_pages);
@@ -2596,6 +2610,19 @@ static void shrink_node_memcg(struct pglist_data *pgdat, 
struct mem_cgroup *memc
                 * scan target and the percentage scanning already complete
                 */
                lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE;
+
+               /*
+                * The shrink_page_list() may find the demote target node is
+                * contended, if so it doesn't make sense to scan anonymous
+                * LRU again.
+                *
+                * Need check if swap is available or not too since demotion
+                * may happen on swapless system.
+                */
+               if (!is_demote_ok(pgdat->node_id, sc) &&
+                   (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0))
+                       lru = LRU_FILE;
+
                nr_scanned = targets[lru] - nr[lru];
                nr[lru] = targets[lru] * (100 - percentage) / 100;
                nr[lru] -= min(nr[lru], nr_scanned);
@@ -3458,6 +3485,7 @@ static void clear_pgdat_congested(pg_data_t *pgdat)
        clear_bit(PGDAT_CONGESTED, &pgdat->flags);
        clear_bit(PGDAT_DIRTY, &pgdat->flags);
        clear_bit(PGDAT_WRITEBACK, &pgdat->flags);
+       clear_bit(PGDAT_CONTENDED, &pgdat->flags);
 }
 
 /*
-- 
1.8.3.1

Reply via email to