From: JP Kobryn <[email protected]> There are situations where reclaim kicks in on a system with free memory. One possible cause is a NUMA imbalance scenario where one or more nodes are under pressure. It would help if we could easily identify such nodes.
Move the pgscan and pgsteal counters from vm_event_item to node_stat_item to provide per-node reclaim visibility. With these counters as node stats, the values are now displayed in the per-node section of /proc/zoneinfo, which allows for quick identification of the affected nodes. /proc/vmstat continues to report the same counters, aggregated across all nodes. But the ordering of these items within the readout changes as they move from the vm events section to the node stats section. Memcg accounting of these counters is preserved. The relocated counters remain visible in memory.stat alongside the existing aggregate pgscan and pgsteal counters. However, this change affects how the global counters are accumulated. Previously, the global event count update was gated on !cgroup_reclaim(), excluding memcg-based reclaim from /proc/vmstat. Now that mod_lruvec_state() is being used to update the counters, the global counters will include all reclaim. This is consistent with how pgdemote counters are already tracked. Finally, the virtio_balloon driver is updated to use global_node_page_state() to fetch the counters, as they are no longer accessible through the vm_events array. Signed-off-by: JP Kobryn <[email protected]> Suggested-by: Johannes Weiner <[email protected]> --- drivers/virtio/virtio_balloon.c | 8 ++--- include/linux/mmzone.h | 12 ++++++++ include/linux/vm_event_item.h | 12 -------- mm/memcontrol.c | 52 +++++++++++++++++++++++---------- mm/vmscan.c | 32 ++++++++------------ mm/vmstat.c | 24 +++++++-------- 6 files changed, 76 insertions(+), 64 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 4e549abe59ff..ab945532ceef 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -369,13 +369,13 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb) update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall); update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_SCAN, - pages_to_bytes(events[PGSCAN_KSWAPD])); + pages_to_bytes(global_node_page_state(PGSCAN_KSWAPD))); update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_SCAN, - pages_to_bytes(events[PGSCAN_DIRECT])); + pages_to_bytes(global_node_page_state(PGSCAN_DIRECT))); update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_RECLAIM, - pages_to_bytes(events[PGSTEAL_KSWAPD])); + pages_to_bytes(global_node_page_state(PGSTEAL_KSWAPD))); update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_RECLAIM, - pages_to_bytes(events[PGSTEAL_DIRECT])); + pages_to_bytes(global_node_page_state(PGSTEAL_DIRECT))); #ifdef CONFIG_HUGETLB_PAGE update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3e51190a55e4..1aa9c7aec889 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -255,6 +255,18 @@ enum node_stat_item { PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, PGDEMOTE_PROACTIVE, + PGSTEAL_KSWAPD, + PGSTEAL_DIRECT, + PGSTEAL_KHUGEPAGED, + PGSTEAL_PROACTIVE, + PGSTEAL_ANON, + PGSTEAL_FILE, + PGSCAN_KSWAPD, + PGSCAN_DIRECT, + PGSCAN_KHUGEPAGED, + PGSCAN_PROACTIVE, + PGSCAN_ANON, + PGSCAN_FILE, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 22a139f82d75..1fa3b3ad0ff9 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -40,19 +40,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGLAZYFREED, PGREFILL, PGREUSE, - PGSTEAL_KSWAPD, - PGSTEAL_DIRECT, - PGSTEAL_KHUGEPAGED, - PGSTEAL_PROACTIVE, - PGSCAN_KSWAPD, - PGSCAN_DIRECT, - PGSCAN_KHUGEPAGED, - PGSCAN_PROACTIVE, PGSCAN_DIRECT_THROTTLE, - PGSCAN_ANON, - PGSCAN_FILE, - PGSTEAL_ANON, - PGSTEAL_FILE, #ifdef CONFIG_NUMA PGSCAN_ZONE_RECLAIM_SUCCESS, PGSCAN_ZONE_RECLAIM_FAILED, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 007413a53b45..e89e77457701 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -328,6 +328,18 @@ static const unsigned int memcg_node_stat_items[] = { PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, PGDEMOTE_PROACTIVE, + PGSTEAL_KSWAPD, + PGSTEAL_DIRECT, + PGSTEAL_KHUGEPAGED, + PGSTEAL_PROACTIVE, + PGSTEAL_ANON, + PGSTEAL_FILE, + PGSCAN_KSWAPD, + PGSCAN_DIRECT, + PGSCAN_KHUGEPAGED, + PGSCAN_PROACTIVE, + PGSCAN_ANON, + PGSCAN_FILE, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif @@ -441,14 +453,6 @@ static const unsigned int memcg_vm_event_stat[] = { #endif PSWPIN, PSWPOUT, - PGSCAN_KSWAPD, - PGSCAN_DIRECT, - PGSCAN_KHUGEPAGED, - PGSCAN_PROACTIVE, - PGSTEAL_KSWAPD, - PGSTEAL_DIRECT, - PGSTEAL_KHUGEPAGED, - PGSTEAL_PROACTIVE, PGFAULT, PGMAJFAULT, PGREFILL, @@ -1382,6 +1386,14 @@ static const struct memory_stat memory_stats[] = { { "pgdemote_direct", PGDEMOTE_DIRECT }, { "pgdemote_khugepaged", PGDEMOTE_KHUGEPAGED }, { "pgdemote_proactive", PGDEMOTE_PROACTIVE }, + { "pgsteal_kswapd", PGSTEAL_KSWAPD }, + { "pgsteal_direct", PGSTEAL_DIRECT }, + { "pgsteal_khugepaged", PGSTEAL_KHUGEPAGED }, + { "pgsteal_proactive", PGSTEAL_PROACTIVE }, + { "pgscan_kswapd", PGSCAN_KSWAPD }, + { "pgscan_direct", PGSCAN_DIRECT }, + { "pgscan_khugepaged", PGSCAN_KHUGEPAGED }, + { "pgscan_proactive", PGSCAN_PROACTIVE }, #ifdef CONFIG_NUMA_BALANCING { "pgpromote_success", PGPROMOTE_SUCCESS }, #endif @@ -1425,6 +1437,14 @@ static int memcg_page_state_output_unit(int item) case PGDEMOTE_DIRECT: case PGDEMOTE_KHUGEPAGED: case PGDEMOTE_PROACTIVE: + case PGSTEAL_KSWAPD: + case PGSTEAL_DIRECT: + case PGSTEAL_KHUGEPAGED: + case PGSTEAL_PROACTIVE: + case PGSCAN_KSWAPD: + case PGSCAN_DIRECT: + case PGSCAN_KHUGEPAGED: + case PGSCAN_PROACTIVE: #ifdef CONFIG_NUMA_BALANCING case PGPROMOTE_SUCCESS: #endif @@ -1496,15 +1516,15 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) /* Accumulated memory events */ seq_buf_printf(s, "pgscan %lu\n", - memcg_events(memcg, PGSCAN_KSWAPD) + - memcg_events(memcg, PGSCAN_DIRECT) + - memcg_events(memcg, PGSCAN_PROACTIVE) + - memcg_events(memcg, PGSCAN_KHUGEPAGED)); + memcg_page_state(memcg, PGSCAN_KSWAPD) + + memcg_page_state(memcg, PGSCAN_DIRECT) + + memcg_page_state(memcg, PGSCAN_PROACTIVE) + + memcg_page_state(memcg, PGSCAN_KHUGEPAGED)); seq_buf_printf(s, "pgsteal %lu\n", - memcg_events(memcg, PGSTEAL_KSWAPD) + - memcg_events(memcg, PGSTEAL_DIRECT) + - memcg_events(memcg, PGSTEAL_PROACTIVE) + - memcg_events(memcg, PGSTEAL_KHUGEPAGED)); + memcg_page_state(memcg, PGSTEAL_KSWAPD) + + memcg_page_state(memcg, PGSTEAL_DIRECT) + + memcg_page_state(memcg, PGSTEAL_PROACTIVE) + + memcg_page_state(memcg, PGSTEAL_KHUGEPAGED)); for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) { #ifdef CONFIG_MEMCG_V1 diff --git a/mm/vmscan.c b/mm/vmscan.c index 44e4fcd6463c..dd6d87340941 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1984,7 +1984,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, unsigned long nr_taken; struct reclaim_stat stat; bool file = is_file_lru(lru); - enum vm_event_item item; + enum node_stat_item item; struct pglist_data *pgdat = lruvec_pgdat(lruvec); bool stalled = false; @@ -2010,10 +2010,8 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); item = PGSCAN_KSWAPD + reclaimer_offset(sc); - if (!cgroup_reclaim(sc)) - __count_vm_events(item, nr_scanned); - count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned); - __count_vm_events(PGSCAN_ANON + file, nr_scanned); + mod_lruvec_state(lruvec, item, nr_scanned); + mod_lruvec_state(lruvec, PGSCAN_ANON + file, nr_scanned); spin_unlock_irq(&lruvec->lru_lock); @@ -2030,10 +2028,8 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, stat.nr_demoted); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); item = PGSTEAL_KSWAPD + reclaimer_offset(sc); - if (!cgroup_reclaim(sc)) - __count_vm_events(item, nr_reclaimed); - count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed); - __count_vm_events(PGSTEAL_ANON + file, nr_reclaimed); + mod_lruvec_state(lruvec, item, nr_reclaimed); + mod_lruvec_state(lruvec, PGSTEAL_ANON + file, nr_reclaimed); lru_note_cost_unlock_irq(lruvec, file, stat.nr_pageout, nr_scanned - nr_reclaimed); @@ -4542,7 +4538,7 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec, { int i; int gen; - enum vm_event_item item; + enum node_stat_item item; int sorted = 0; int scanned = 0; int isolated = 0; @@ -4601,13 +4597,11 @@ static int scan_folios(unsigned long nr_to_scan, struct lruvec *lruvec, } item = PGSCAN_KSWAPD + reclaimer_offset(sc); - if (!cgroup_reclaim(sc)) { - __count_vm_events(item, isolated); + if (!cgroup_reclaim(sc)) __count_vm_events(PGREFILL, sorted); - } - count_memcg_events(memcg, item, isolated); + mod_lruvec_state(lruvec, item, isolated); count_memcg_events(memcg, PGREFILL, sorted); - __count_vm_events(PGSCAN_ANON + type, isolated); + mod_lruvec_state(lruvec, PGSCAN_ANON + type, isolated); trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, scan_batch, scanned, skipped, isolated, type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON); @@ -4692,7 +4686,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec, LIST_HEAD(clean); struct folio *folio; struct folio *next; - enum vm_event_item item; + enum node_stat_item item; struct reclaim_stat stat; struct lru_gen_mm_walk *walk; bool skip_retry = false; @@ -4756,10 +4750,8 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec, stat.nr_demoted); item = PGSTEAL_KSWAPD + reclaimer_offset(sc); - if (!cgroup_reclaim(sc)) - __count_vm_events(item, reclaimed); - count_memcg_events(memcg, item, reclaimed); - __count_vm_events(PGSTEAL_ANON + type, reclaimed); + mod_lruvec_state(lruvec, item, reclaimed); + mod_lruvec_state(lruvec, PGSTEAL_ANON + type, reclaimed); spin_unlock_irq(&lruvec->lru_lock); diff --git a/mm/vmstat.c b/mm/vmstat.c index 99270713e0c1..d952c1e763e6 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1276,6 +1276,18 @@ const char * const vmstat_text[] = { [I(PGDEMOTE_DIRECT)] = "pgdemote_direct", [I(PGDEMOTE_KHUGEPAGED)] = "pgdemote_khugepaged", [I(PGDEMOTE_PROACTIVE)] = "pgdemote_proactive", + [I(PGSTEAL_KSWAPD)] = "pgsteal_kswapd", + [I(PGSTEAL_DIRECT)] = "pgsteal_direct", + [I(PGSTEAL_KHUGEPAGED)] = "pgsteal_khugepaged", + [I(PGSTEAL_PROACTIVE)] = "pgsteal_proactive", + [I(PGSTEAL_ANON)] = "pgsteal_anon", + [I(PGSTEAL_FILE)] = "pgsteal_file", + [I(PGSCAN_KSWAPD)] = "pgscan_kswapd", + [I(PGSCAN_DIRECT)] = "pgscan_direct", + [I(PGSCAN_KHUGEPAGED)] = "pgscan_khugepaged", + [I(PGSCAN_PROACTIVE)] = "pgscan_proactive", + [I(PGSCAN_ANON)] = "pgscan_anon", + [I(PGSCAN_FILE)] = "pgscan_file", #ifdef CONFIG_HUGETLB_PAGE [I(NR_HUGETLB)] = "nr_hugetlb", #endif @@ -1320,19 +1332,7 @@ const char * const vmstat_text[] = { [I(PGREFILL)] = "pgrefill", [I(PGREUSE)] = "pgreuse", - [I(PGSTEAL_KSWAPD)] = "pgsteal_kswapd", - [I(PGSTEAL_DIRECT)] = "pgsteal_direct", - [I(PGSTEAL_KHUGEPAGED)] = "pgsteal_khugepaged", - [I(PGSTEAL_PROACTIVE)] = "pgsteal_proactive", - [I(PGSCAN_KSWAPD)] = "pgscan_kswapd", - [I(PGSCAN_DIRECT)] = "pgscan_direct", - [I(PGSCAN_KHUGEPAGED)] = "pgscan_khugepaged", - [I(PGSCAN_PROACTIVE)] = "pgscan_proactive", [I(PGSCAN_DIRECT_THROTTLE)] = "pgscan_direct_throttle", - [I(PGSCAN_ANON)] = "pgscan_anon", - [I(PGSCAN_FILE)] = "pgscan_file", - [I(PGSTEAL_ANON)] = "pgsteal_anon", - [I(PGSTEAL_FILE)] = "pgsteal_file", #ifdef CONFIG_NUMA [I(PGSCAN_ZONE_RECLAIM_SUCCESS)] = "zone_reclaim_success", -- 2.47.3

