From: JP Kobryn <[email protected]>

There are situations where reclaim kicks in on a system with free memory.
One possible cause is a NUMA imbalance scenario where one or more nodes are
under pressure. It would help if we could easily identify such nodes.

Move the pgscan and pgsteal counters from vm_event_item to node_stat_item
to provide per-node reclaim visibility. With these counters as node stats,
the values are now displayed in the per-node section of /proc/zoneinfo,
which allows for quick identification of the affected nodes.

/proc/vmstat continues to report the same counters, aggregated across all
nodes. But the ordering of these items within the readout changes as they
move from the vm events section to the node stats section.

Memcg accounting of these counters is preserved. The relocated counters
remain visible in memory.stat alongside the existing aggregate pgscan and
pgsteal counters.

However, this change affects how the global counters are accumulated.
Previously, the global event count update was gated on !cgroup_reclaim(),
excluding memcg-based reclaim from /proc/vmstat. Now that
mod_lruvec_state() is being used to update the counters, the global
counters will include all reclaim. This is consistent with how pgdemote
counters are already tracked.

Finally, the virtio_balloon driver is updated to use
global_node_page_state() to fetch the counters, as they are no longer
accessible through the vm_events array.

Signed-off-by: JP Kobryn <[email protected]>
Suggested-by: Johannes Weiner <[email protected]>
---
 drivers/virtio/virtio_balloon.c |  8 ++---
 include/linux/mmzone.h          | 12 ++++++++
 include/linux/vm_event_item.h   | 12 --------
 mm/memcontrol.c                 | 52 +++++++++++++++++++++++----------
 mm/vmscan.c                     | 32 ++++++++------------
 mm/vmstat.c                     | 24 +++++++--------
 6 files changed, 76 insertions(+), 64 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 4e549abe59ff..ab945532ceef 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -369,13 +369,13 @@ static inline unsigned int update_balloon_vm_stats(struct 
virtio_balloon *vb)
        update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall);
 
        update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_SCAN,
-                   pages_to_bytes(events[PGSCAN_KSWAPD]));
+                   pages_to_bytes(global_node_page_state(PGSCAN_KSWAPD)));
        update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_SCAN,
-                   pages_to_bytes(events[PGSCAN_DIRECT]));
+                   pages_to_bytes(global_node_page_state(PGSCAN_DIRECT)));
        update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_RECLAIM,
-                   pages_to_bytes(events[PGSTEAL_KSWAPD]));
+                   pages_to_bytes(global_node_page_state(PGSTEAL_KSWAPD)));
        update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_RECLAIM,
-                   pages_to_bytes(events[PGSTEAL_DIRECT]));
+                   pages_to_bytes(global_node_page_state(PGSTEAL_DIRECT)));
 
 #ifdef CONFIG_HUGETLB_PAGE
        update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3e51190a55e4..1aa9c7aec889 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -255,6 +255,18 @@ enum node_stat_item {
        PGDEMOTE_DIRECT,
        PGDEMOTE_KHUGEPAGED,
        PGDEMOTE_PROACTIVE,
+       PGSTEAL_KSWAPD,
+       PGSTEAL_DIRECT,
+       PGSTEAL_KHUGEPAGED,
+       PGSTEAL_PROACTIVE,
+       PGSTEAL_ANON,
+       PGSTEAL_FILE,
+       PGSCAN_KSWAPD,
+       PGSCAN_DIRECT,
+       PGSCAN_KHUGEPAGED,
+       PGSCAN_PROACTIVE,
+       PGSCAN_ANON,
+       PGSCAN_FILE,
 #ifdef CONFIG_HUGETLB_PAGE
        NR_HUGETLB,
 #endif
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 22a139f82d75..1fa3b3ad0ff9 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -40,19 +40,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
                PGLAZYFREED,
                PGREFILL,
                PGREUSE,
-               PGSTEAL_KSWAPD,
-               PGSTEAL_DIRECT,
-               PGSTEAL_KHUGEPAGED,
-               PGSTEAL_PROACTIVE,
-               PGSCAN_KSWAPD,
-               PGSCAN_DIRECT,
-               PGSCAN_KHUGEPAGED,
-               PGSCAN_PROACTIVE,
                PGSCAN_DIRECT_THROTTLE,
-               PGSCAN_ANON,
-               PGSCAN_FILE,
-               PGSTEAL_ANON,
-               PGSTEAL_FILE,
 #ifdef CONFIG_NUMA
                PGSCAN_ZONE_RECLAIM_SUCCESS,
                PGSCAN_ZONE_RECLAIM_FAILED,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 007413a53b45..e89e77457701 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -328,6 +328,18 @@ static const unsigned int memcg_node_stat_items[] = {
        PGDEMOTE_DIRECT,
        PGDEMOTE_KHUGEPAGED,
        PGDEMOTE_PROACTIVE,
+       PGSTEAL_KSWAPD,
+       PGSTEAL_DIRECT,
+       PGSTEAL_KHUGEPAGED,
+       PGSTEAL_PROACTIVE,
+       PGSTEAL_ANON,
+       PGSTEAL_FILE,
+       PGSCAN_KSWAPD,
+       PGSCAN_DIRECT,
+       PGSCAN_KHUGEPAGED,
+       PGSCAN_PROACTIVE,
+       PGSCAN_ANON,
+       PGSCAN_FILE,
 #ifdef CONFIG_HUGETLB_PAGE
        NR_HUGETLB,
 #endif
@@ -441,14 +453,6 @@ static const unsigned int memcg_vm_event_stat[] = {
 #endif
        PSWPIN,
        PSWPOUT,
-       PGSCAN_KSWAPD,
-       PGSCAN_DIRECT,
-       PGSCAN_KHUGEPAGED,
-       PGSCAN_PROACTIVE,
-       PGSTEAL_KSWAPD,
-       PGSTEAL_DIRECT,
-       PGSTEAL_KHUGEPAGED,
-       PGSTEAL_PROACTIVE,
        PGFAULT,
        PGMAJFAULT,
        PGREFILL,
@@ -1382,6 +1386,14 @@ static const struct memory_stat memory_stats[] = {
        { "pgdemote_direct",            PGDEMOTE_DIRECT         },
        { "pgdemote_khugepaged",        PGDEMOTE_KHUGEPAGED     },
        { "pgdemote_proactive",         PGDEMOTE_PROACTIVE      },
+       { "pgsteal_kswapd",             PGSTEAL_KSWAPD          },
+       { "pgsteal_direct",             PGSTEAL_DIRECT          },
+       { "pgsteal_khugepaged",         PGSTEAL_KHUGEPAGED      },
+       { "pgsteal_proactive",          PGSTEAL_PROACTIVE       },
+       { "pgscan_kswapd",              PGSCAN_KSWAPD           },
+       { "pgscan_direct",              PGSCAN_DIRECT           },
+       { "pgscan_khugepaged",          PGSCAN_KHUGEPAGED       },
+       { "pgscan_proactive",           PGSCAN_PROACTIVE        },
 #ifdef CONFIG_NUMA_BALANCING
        { "pgpromote_success",          PGPROMOTE_SUCCESS       },
 #endif
@@ -1425,6 +1437,14 @@ static int memcg_page_state_output_unit(int item)
        case PGDEMOTE_DIRECT:
        case PGDEMOTE_KHUGEPAGED:
        case PGDEMOTE_PROACTIVE:
+       case PGSTEAL_KSWAPD:
+       case PGSTEAL_DIRECT:
+       case PGSTEAL_KHUGEPAGED:
+       case PGSTEAL_PROACTIVE:
+       case PGSCAN_KSWAPD:
+       case PGSCAN_DIRECT:
+       case PGSCAN_KHUGEPAGED:
+       case PGSCAN_PROACTIVE:
 #ifdef CONFIG_NUMA_BALANCING
        case PGPROMOTE_SUCCESS:
 #endif
@@ -1496,15 +1516,15 @@ static void memcg_stat_format(struct mem_cgroup *memcg, 
struct seq_buf *s)
 
        /* Accumulated memory events */
        seq_buf_printf(s, "pgscan %lu\n",
-                      memcg_events(memcg, PGSCAN_KSWAPD) +
-                      memcg_events(memcg, PGSCAN_DIRECT) +
-                      memcg_events(memcg, PGSCAN_PROACTIVE) +
-                      memcg_events(memcg, PGSCAN_KHUGEPAGED));
+                      memcg_page_state(memcg, PGSCAN_KSWAPD) +
+                      memcg_page_state(memcg, PGSCAN_DIRECT) +
+                      memcg_page_state(memcg, PGSCAN_PROACTIVE) +
+                      memcg_page_state(memcg, PGSCAN_KHUGEPAGED));
        seq_buf_printf(s, "pgsteal %lu\n",
-                      memcg_events(memcg, PGSTEAL_KSWAPD) +
-                      memcg_events(memcg, PGSTEAL_DIRECT) +
-                      memcg_events(memcg, PGSTEAL_PROACTIVE) +
-                      memcg_events(memcg, PGSTEAL_KHUGEPAGED));
+                      memcg_page_state(memcg, PGSTEAL_KSWAPD) +
+                      memcg_page_state(memcg, PGSTEAL_DIRECT) +
+                      memcg_page_state(memcg, PGSTEAL_PROACTIVE) +
+                      memcg_page_state(memcg, PGSTEAL_KHUGEPAGED));
 
        for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
 #ifdef CONFIG_MEMCG_V1
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 44e4fcd6463c..dd6d87340941 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1984,7 +1984,7 @@ static unsigned long shrink_inactive_list(unsigned long 
nr_to_scan,
        unsigned long nr_taken;
        struct reclaim_stat stat;
        bool file = is_file_lru(lru);
-       enum vm_event_item item;
+       enum node_stat_item item;
        struct pglist_data *pgdat = lruvec_pgdat(lruvec);
        bool stalled = false;
 
@@ -2010,10 +2010,8 @@ static unsigned long shrink_inactive_list(unsigned long 
nr_to_scan,
 
        __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
        item = PGSCAN_KSWAPD + reclaimer_offset(sc);
-       if (!cgroup_reclaim(sc))
-               __count_vm_events(item, nr_scanned);
-       count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
-       __count_vm_events(PGSCAN_ANON + file, nr_scanned);
+       mod_lruvec_state(lruvec, item, nr_scanned);
+       mod_lruvec_state(lruvec, PGSCAN_ANON + file, nr_scanned);
 
        spin_unlock_irq(&lruvec->lru_lock);
 
@@ -2030,10 +2028,8 @@ static unsigned long shrink_inactive_list(unsigned long 
nr_to_scan,
                                        stat.nr_demoted);
        __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
        item = PGSTEAL_KSWAPD + reclaimer_offset(sc);
-       if (!cgroup_reclaim(sc))
-               __count_vm_events(item, nr_reclaimed);
-       count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
-       __count_vm_events(PGSTEAL_ANON + file, nr_reclaimed);
+       mod_lruvec_state(lruvec, item, nr_reclaimed);
+       mod_lruvec_state(lruvec, PGSTEAL_ANON + file, nr_reclaimed);
 
        lru_note_cost_unlock_irq(lruvec, file, stat.nr_pageout,
                                        nr_scanned - nr_reclaimed);
@@ -4542,7 +4538,7 @@ static int scan_folios(unsigned long nr_to_scan, struct 
lruvec *lruvec,
 {
        int i;
        int gen;
-       enum vm_event_item item;
+       enum node_stat_item item;
        int sorted = 0;
        int scanned = 0;
        int isolated = 0;
@@ -4601,13 +4597,11 @@ static int scan_folios(unsigned long nr_to_scan, struct 
lruvec *lruvec,
        }
 
        item = PGSCAN_KSWAPD + reclaimer_offset(sc);
-       if (!cgroup_reclaim(sc)) {
-               __count_vm_events(item, isolated);
+       if (!cgroup_reclaim(sc))
                __count_vm_events(PGREFILL, sorted);
-       }
-       count_memcg_events(memcg, item, isolated);
+       mod_lruvec_state(lruvec, item, isolated);
        count_memcg_events(memcg, PGREFILL, sorted);
-       __count_vm_events(PGSCAN_ANON + type, isolated);
+       mod_lruvec_state(lruvec, PGSCAN_ANON + type, isolated);
        trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, scan_batch,
                                scanned, skipped, isolated,
                                type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
@@ -4692,7 +4686,7 @@ static int evict_folios(unsigned long nr_to_scan, struct 
lruvec *lruvec,
        LIST_HEAD(clean);
        struct folio *folio;
        struct folio *next;
-       enum vm_event_item item;
+       enum node_stat_item item;
        struct reclaim_stat stat;
        struct lru_gen_mm_walk *walk;
        bool skip_retry = false;
@@ -4756,10 +4750,8 @@ static int evict_folios(unsigned long nr_to_scan, struct 
lruvec *lruvec,
                                        stat.nr_demoted);
 
        item = PGSTEAL_KSWAPD + reclaimer_offset(sc);
-       if (!cgroup_reclaim(sc))
-               __count_vm_events(item, reclaimed);
-       count_memcg_events(memcg, item, reclaimed);
-       __count_vm_events(PGSTEAL_ANON + type, reclaimed);
+       mod_lruvec_state(lruvec, item, reclaimed);
+       mod_lruvec_state(lruvec, PGSTEAL_ANON + type, reclaimed);
 
        spin_unlock_irq(&lruvec->lru_lock);
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 99270713e0c1..d952c1e763e6 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1276,6 +1276,18 @@ const char * const vmstat_text[] = {
        [I(PGDEMOTE_DIRECT)]                    = "pgdemote_direct",
        [I(PGDEMOTE_KHUGEPAGED)]                = "pgdemote_khugepaged",
        [I(PGDEMOTE_PROACTIVE)]                 = "pgdemote_proactive",
+       [I(PGSTEAL_KSWAPD)]                     = "pgsteal_kswapd",
+       [I(PGSTEAL_DIRECT)]                     = "pgsteal_direct",
+       [I(PGSTEAL_KHUGEPAGED)]                 = "pgsteal_khugepaged",
+       [I(PGSTEAL_PROACTIVE)]                  = "pgsteal_proactive",
+       [I(PGSTEAL_ANON)]                       = "pgsteal_anon",
+       [I(PGSTEAL_FILE)]                       = "pgsteal_file",
+       [I(PGSCAN_KSWAPD)]                      = "pgscan_kswapd",
+       [I(PGSCAN_DIRECT)]                      = "pgscan_direct",
+       [I(PGSCAN_KHUGEPAGED)]                  = "pgscan_khugepaged",
+       [I(PGSCAN_PROACTIVE)]                   = "pgscan_proactive",
+       [I(PGSCAN_ANON)]                        = "pgscan_anon",
+       [I(PGSCAN_FILE)]                        = "pgscan_file",
 #ifdef CONFIG_HUGETLB_PAGE
        [I(NR_HUGETLB)]                         = "nr_hugetlb",
 #endif
@@ -1320,19 +1332,7 @@ const char * const vmstat_text[] = {
 
        [I(PGREFILL)]                           = "pgrefill",
        [I(PGREUSE)]                            = "pgreuse",
-       [I(PGSTEAL_KSWAPD)]                     = "pgsteal_kswapd",
-       [I(PGSTEAL_DIRECT)]                     = "pgsteal_direct",
-       [I(PGSTEAL_KHUGEPAGED)]                 = "pgsteal_khugepaged",
-       [I(PGSTEAL_PROACTIVE)]                  = "pgsteal_proactive",
-       [I(PGSCAN_KSWAPD)]                      = "pgscan_kswapd",
-       [I(PGSCAN_DIRECT)]                      = "pgscan_direct",
-       [I(PGSCAN_KHUGEPAGED)]                  = "pgscan_khugepaged",
-       [I(PGSCAN_PROACTIVE)]                   = "pgscan_proactive",
        [I(PGSCAN_DIRECT_THROTTLE)]             = "pgscan_direct_throttle",
-       [I(PGSCAN_ANON)]                        = "pgscan_anon",
-       [I(PGSCAN_FILE)]                        = "pgscan_file",
-       [I(PGSTEAL_ANON)]                       = "pgsteal_anon",
-       [I(PGSTEAL_FILE)]                       = "pgsteal_file",
 
 #ifdef CONFIG_NUMA
        [I(PGSCAN_ZONE_RECLAIM_SUCCESS)]        = "zone_reclaim_success",
-- 
2.47.3


Reply via email to