On Wed 11-02-26 20:51:09, JP Kobryn wrote: > It would be useful to narrow down reclaim to specific nodes. > > Provide per-node reclaim visibility by changing the pgscan and pgsteal > stats from global vm_event_item's to node_stat_item's. Note this change has > the side effect of now tracking these stats on a per-memcg basis.
The changelog could have been more clear about the actual changes as this is not overly clear for untrained eyes. The most important parts are that /proc/vmstat will preserve reclaim stats with slightly different counters ordering (shouldn't break userspace much^W), per-node stats will be now newly displayed in /proc/zoneinfo - this is presumably the primary motivation to have a better insight of per-node reclaim activity, and memcg stats will now show their share of the global memory reclaim. Have I missed anything? > Signed-off-by: JP Kobryn <[email protected]> > Suggested-by: Johannes Weiner <[email protected]> > --- > drivers/virtio/virtio_balloon.c | 8 ++++---- > include/linux/mmzone.h | 12 +++++++++++ > include/linux/vm_event_item.h | 12 ----------- > mm/memcontrol.c | 36 ++++++++++++++++++--------------- > mm/vmscan.c | 32 +++++++++++------------------ > mm/vmstat.c | 24 +++++++++++----------- > 6 files changed, 60 insertions(+), 64 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index 74fe59f5a78c..1341d9d1a2a1 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -374,13 +374,13 @@ static inline unsigned int > update_balloon_vm_stats(struct virtio_balloon *vb) > update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall); > > update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_SCAN, > - pages_to_bytes(events[PGSCAN_KSWAPD])); > + pages_to_bytes(global_node_page_state(PGSCAN_KSWAPD))); > update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_SCAN, > - pages_to_bytes(events[PGSCAN_DIRECT])); > + pages_to_bytes(global_node_page_state(PGSCAN_DIRECT))); > update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_RECLAIM, > - pages_to_bytes(events[PGSTEAL_KSWAPD])); > + pages_to_bytes(global_node_page_state(PGSTEAL_KSWAPD))); > update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_RECLAIM, > - pages_to_bytes(events[PGSTEAL_DIRECT])); > + pages_to_bytes(global_node_page_state(PGSTEAL_DIRECT))); > > #ifdef CONFIG_HUGETLB_PAGE > update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC, > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > index 762609d5f0af..fc39c107a4b5 100644 > --- a/include/linux/mmzone.h > +++ b/include/linux/mmzone.h > @@ -255,6 +255,18 @@ enum node_stat_item { > PGDEMOTE_DIRECT, > PGDEMOTE_KHUGEPAGED, > PGDEMOTE_PROACTIVE, > + PGSTEAL_KSWAPD, > + PGSTEAL_DIRECT, > + PGSTEAL_KHUGEPAGED, > + PGSTEAL_PROACTIVE, > + PGSTEAL_ANON, > + PGSTEAL_FILE, > + PGSCAN_KSWAPD, > + PGSCAN_DIRECT, > + PGSCAN_KHUGEPAGED, > + PGSCAN_PROACTIVE, > + PGSCAN_ANON, > + PGSCAN_FILE, > #ifdef CONFIG_NUMA > PGALLOC_MPOL_DEFAULT, > PGALLOC_MPOL_PREFERRED, > diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h > index 92f80b4d69a6..6f1787680658 100644 > --- a/include/linux/vm_event_item.h > +++ b/include/linux/vm_event_item.h > @@ -40,19 +40,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, > PGLAZYFREED, > PGREFILL, > PGREUSE, > - PGSTEAL_KSWAPD, > - PGSTEAL_DIRECT, > - PGSTEAL_KHUGEPAGED, > - PGSTEAL_PROACTIVE, > - PGSCAN_KSWAPD, > - PGSCAN_DIRECT, > - PGSCAN_KHUGEPAGED, > - PGSCAN_PROACTIVE, > PGSCAN_DIRECT_THROTTLE, > - PGSCAN_ANON, > - PGSCAN_FILE, > - PGSTEAL_ANON, > - PGSTEAL_FILE, > #ifdef CONFIG_NUMA > PGSCAN_ZONE_RECLAIM_SUCCESS, > PGSCAN_ZONE_RECLAIM_FAILED, > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 86f43b7e5f71..bde0b6536be6 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -328,6 +328,18 @@ static const unsigned int memcg_node_stat_items[] = { > PGDEMOTE_DIRECT, > PGDEMOTE_KHUGEPAGED, > PGDEMOTE_PROACTIVE, > + PGSTEAL_KSWAPD, > + PGSTEAL_DIRECT, > + PGSTEAL_KHUGEPAGED, > + PGSTEAL_PROACTIVE, > + PGSTEAL_ANON, > + PGSTEAL_FILE, > + PGSCAN_KSWAPD, > + PGSCAN_DIRECT, > + PGSCAN_KHUGEPAGED, > + PGSCAN_PROACTIVE, > + PGSCAN_ANON, > + PGSCAN_FILE, > #ifdef CONFIG_HUGETLB_PAGE > NR_HUGETLB, > #endif > @@ -441,14 +453,6 @@ static const unsigned int memcg_vm_event_stat[] = { > #endif > PSWPIN, > PSWPOUT, > - PGSCAN_KSWAPD, > - PGSCAN_DIRECT, > - PGSCAN_KHUGEPAGED, > - PGSCAN_PROACTIVE, > - PGSTEAL_KSWAPD, > - PGSTEAL_DIRECT, > - PGSTEAL_KHUGEPAGED, > - PGSTEAL_PROACTIVE, > PGFAULT, > PGMAJFAULT, > PGREFILL, > @@ -1496,15 +1500,15 @@ static void memcg_stat_format(struct mem_cgroup > *memcg, struct seq_buf *s) > > /* Accumulated memory events */ > seq_buf_printf(s, "pgscan %lu\n", > - memcg_events(memcg, PGSCAN_KSWAPD) + > - memcg_events(memcg, PGSCAN_DIRECT) + > - memcg_events(memcg, PGSCAN_PROACTIVE) + > - memcg_events(memcg, PGSCAN_KHUGEPAGED)); > + memcg_page_state(memcg, PGSCAN_KSWAPD) + > + memcg_page_state(memcg, PGSCAN_DIRECT) + > + memcg_page_state(memcg, PGSCAN_PROACTIVE) + > + memcg_page_state(memcg, PGSCAN_KHUGEPAGED)); > seq_buf_printf(s, "pgsteal %lu\n", > - memcg_events(memcg, PGSTEAL_KSWAPD) + > - memcg_events(memcg, PGSTEAL_DIRECT) + > - memcg_events(memcg, PGSTEAL_PROACTIVE) + > - memcg_events(memcg, PGSTEAL_KHUGEPAGED)); > + memcg_page_state(memcg, PGSTEAL_KSWAPD) + > + memcg_page_state(memcg, PGSTEAL_DIRECT) + > + memcg_page_state(memcg, PGSTEAL_PROACTIVE) + > + memcg_page_state(memcg, PGSTEAL_KHUGEPAGED)); > > for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) { > #ifdef CONFIG_MEMCG_V1 > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 614ccf39fe3f..16a0f21e3ea1 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -1977,7 +1977,7 @@ static unsigned long shrink_inactive_list(unsigned long > nr_to_scan, > unsigned long nr_taken; > struct reclaim_stat stat; > bool file = is_file_lru(lru); > - enum vm_event_item item; > + enum node_stat_item item; > struct pglist_data *pgdat = lruvec_pgdat(lruvec); > bool stalled = false; > > @@ -2003,10 +2003,8 @@ static unsigned long shrink_inactive_list(unsigned > long nr_to_scan, > > __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); > item = PGSCAN_KSWAPD + reclaimer_offset(sc); > - if (!cgroup_reclaim(sc)) > - __count_vm_events(item, nr_scanned); > - count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned); > - __count_vm_events(PGSCAN_ANON + file, nr_scanned); > + mod_lruvec_state(lruvec, item, nr_scanned); > + mod_lruvec_state(lruvec, PGSCAN_ANON + file, nr_scanned); > > spin_unlock_irq(&lruvec->lru_lock); > > @@ -2023,10 +2021,8 @@ static unsigned long shrink_inactive_list(unsigned > long nr_to_scan, > stat.nr_demoted); > __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); > item = PGSTEAL_KSWAPD + reclaimer_offset(sc); > - if (!cgroup_reclaim(sc)) > - __count_vm_events(item, nr_reclaimed); > - count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed); > - __count_vm_events(PGSTEAL_ANON + file, nr_reclaimed); > + mod_lruvec_state(lruvec, item, nr_reclaimed); > + mod_lruvec_state(lruvec, PGSTEAL_ANON + file, nr_reclaimed); > > lru_note_cost_unlock_irq(lruvec, file, stat.nr_pageout, > nr_scanned - nr_reclaimed); > @@ -4536,7 +4532,7 @@ static int scan_folios(unsigned long nr_to_scan, struct > lruvec *lruvec, > { > int i; > int gen; > - enum vm_event_item item; > + enum node_stat_item item; > int sorted = 0; > int scanned = 0; > int isolated = 0; > @@ -4595,13 +4591,11 @@ static int scan_folios(unsigned long nr_to_scan, > struct lruvec *lruvec, > } > > item = PGSCAN_KSWAPD + reclaimer_offset(sc); > - if (!cgroup_reclaim(sc)) { > - __count_vm_events(item, isolated); > + if (!cgroup_reclaim(sc)) > __count_vm_events(PGREFILL, sorted); > - } > - count_memcg_events(memcg, item, isolated); > + mod_lruvec_state(lruvec, item, isolated); > count_memcg_events(memcg, PGREFILL, sorted); > - __count_vm_events(PGSCAN_ANON + type, isolated); > + mod_lruvec_state(lruvec, PGSCAN_ANON + type, isolated); > trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, scan_batch, > scanned, skipped, isolated, > type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON); > @@ -4686,7 +4680,7 @@ static int evict_folios(unsigned long nr_to_scan, > struct lruvec *lruvec, > LIST_HEAD(clean); > struct folio *folio; > struct folio *next; > - enum vm_event_item item; > + enum node_stat_item item; > struct reclaim_stat stat; > struct lru_gen_mm_walk *walk; > bool skip_retry = false; > @@ -4750,10 +4744,8 @@ static int evict_folios(unsigned long nr_to_scan, > struct lruvec *lruvec, > stat.nr_demoted); > > item = PGSTEAL_KSWAPD + reclaimer_offset(sc); > - if (!cgroup_reclaim(sc)) > - __count_vm_events(item, reclaimed); > - count_memcg_events(memcg, item, reclaimed); > - __count_vm_events(PGSTEAL_ANON + type, reclaimed); > + mod_lruvec_state(lruvec, item, reclaimed); > + mod_lruvec_state(lruvec, PGSTEAL_ANON + type, reclaimed); > > spin_unlock_irq(&lruvec->lru_lock); > > diff --git a/mm/vmstat.c b/mm/vmstat.c > index 74e0ddde1e93..e4b259989d58 100644 > --- a/mm/vmstat.c > +++ b/mm/vmstat.c > @@ -1291,6 +1291,18 @@ const char * const vmstat_text[] = { > [I(PGDEMOTE_DIRECT)] = "pgdemote_direct", > [I(PGDEMOTE_KHUGEPAGED)] = "pgdemote_khugepaged", > [I(PGDEMOTE_PROACTIVE)] = "pgdemote_proactive", > + [I(PGSTEAL_KSWAPD)] = "pgsteal_kswapd", > + [I(PGSTEAL_DIRECT)] = "pgsteal_direct", > + [I(PGSTEAL_KHUGEPAGED)] = "pgsteal_khugepaged", > + [I(PGSTEAL_PROACTIVE)] = "pgsteal_proactive", > + [I(PGSTEAL_ANON)] = "pgsteal_anon", > + [I(PGSTEAL_FILE)] = "pgsteal_file", > + [I(PGSCAN_KSWAPD)] = "pgscan_kswapd", > + [I(PGSCAN_DIRECT)] = "pgscan_direct", > + [I(PGSCAN_KHUGEPAGED)] = "pgscan_khugepaged", > + [I(PGSCAN_PROACTIVE)] = "pgscan_proactive", > + [I(PGSCAN_ANON)] = "pgscan_anon", > + [I(PGSCAN_FILE)] = "pgscan_file", > #ifdef CONFIG_NUMA > [I(PGALLOC_MPOL_DEFAULT)] = "pgalloc_mpol_default", > [I(PGALLOC_MPOL_PREFERRED)] = "pgalloc_mpol_preferred", > @@ -1344,19 +1356,7 @@ const char * const vmstat_text[] = { > > [I(PGREFILL)] = "pgrefill", > [I(PGREUSE)] = "pgreuse", > - [I(PGSTEAL_KSWAPD)] = "pgsteal_kswapd", > - [I(PGSTEAL_DIRECT)] = "pgsteal_direct", > - [I(PGSTEAL_KHUGEPAGED)] = "pgsteal_khugepaged", > - [I(PGSTEAL_PROACTIVE)] = "pgsteal_proactive", > - [I(PGSCAN_KSWAPD)] = "pgscan_kswapd", > - [I(PGSCAN_DIRECT)] = "pgscan_direct", > - [I(PGSCAN_KHUGEPAGED)] = "pgscan_khugepaged", > - [I(PGSCAN_PROACTIVE)] = "pgscan_proactive", > [I(PGSCAN_DIRECT_THROTTLE)] = "pgscan_direct_throttle", > - [I(PGSCAN_ANON)] = "pgscan_anon", > - [I(PGSCAN_FILE)] = "pgscan_file", > - [I(PGSTEAL_ANON)] = "pgsteal_anon", > - [I(PGSTEAL_FILE)] = "pgsteal_file", > > #ifdef CONFIG_NUMA > [I(PGSCAN_ZONE_RECLAIM_SUCCESS)] = "zone_reclaim_success", > -- > 2.47.3 > -- Michal Hocko SUSE Labs

