On Tue 17-02-26 19:29:41, JP Kobryn (Meta) wrote:
> From: JP Kobryn <[email protected]>
> 
> There are situations where reclaim kicks in on a system with free memory.
> One possible cause is a NUMA imbalance scenario where one or more nodes are
> under pressure. It would help if we could easily identify such nodes.
> 
> Move the pgscan and pgsteal counters from vm_event_item to node_stat_item
> to provide per-node reclaim visibility. With these counters as node stats,
> the values are now displayed in the per-node section of /proc/zoneinfo,
> which allows for quick identification of the affected nodes.
> 
> /proc/vmstat continues to report the same counters, aggregated across all
> nodes. But the ordering of these items within the readout changes as they
> move from the vm events section to the node stats section.
> 
> Memcg accounting of these counters is preserved. The relocated counters
> remain visible in memory.stat alongside the existing aggregate pgscan and
> pgsteal counters.
> 
> However, this change affects how the global counters are accumulated.
> Previously, the global event count update was gated on !cgroup_reclaim(),
> excluding memcg-based reclaim from /proc/vmstat. Now that
> mod_lruvec_state() is being used to update the counters, the global
> counters will include all reclaim. This is consistent with how pgdemote
> counters are already tracked.
> 
> Finally, the virtio_balloon driver is updated to use
> global_node_page_state() to fetch the counters, as they are no longer
> accessible through the vm_events array.
> 
> Signed-off-by: JP Kobryn <[email protected]>
> Suggested-by: Johannes Weiner <[email protected]>

Acked-by: Michal Hocko <[email protected]>
Thanks!

> ---
>  drivers/virtio/virtio_balloon.c |  8 ++---
>  include/linux/mmzone.h          | 12 ++++++++
>  include/linux/vm_event_item.h   | 12 --------
>  mm/memcontrol.c                 | 52 +++++++++++++++++++++++----------
>  mm/vmscan.c                     | 32 ++++++++------------
>  mm/vmstat.c                     | 24 +++++++--------
>  6 files changed, 76 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 4e549abe59ff..ab945532ceef 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -369,13 +369,13 @@ static inline unsigned int 
> update_balloon_vm_stats(struct virtio_balloon *vb)
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall);
>  
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_SCAN,
> -                 pages_to_bytes(events[PGSCAN_KSWAPD]));
> +                 pages_to_bytes(global_node_page_state(PGSCAN_KSWAPD)));
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_SCAN,
> -                 pages_to_bytes(events[PGSCAN_DIRECT]));
> +                 pages_to_bytes(global_node_page_state(PGSCAN_DIRECT)));
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_RECLAIM,
> -                 pages_to_bytes(events[PGSTEAL_KSWAPD]));
> +                 pages_to_bytes(global_node_page_state(PGSTEAL_KSWAPD)));
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_RECLAIM,
> -                 pages_to_bytes(events[PGSTEAL_DIRECT]));
> +                 pages_to_bytes(global_node_page_state(PGSTEAL_DIRECT)));
>  
>  #ifdef CONFIG_HUGETLB_PAGE
>       update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 3e51190a55e4..1aa9c7aec889 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -255,6 +255,18 @@ enum node_stat_item {
>       PGDEMOTE_DIRECT,
>       PGDEMOTE_KHUGEPAGED,
>       PGDEMOTE_PROACTIVE,
> +     PGSTEAL_KSWAPD,
> +     PGSTEAL_DIRECT,
> +     PGSTEAL_KHUGEPAGED,
> +     PGSTEAL_PROACTIVE,
> +     PGSTEAL_ANON,
> +     PGSTEAL_FILE,
> +     PGSCAN_KSWAPD,
> +     PGSCAN_DIRECT,
> +     PGSCAN_KHUGEPAGED,
> +     PGSCAN_PROACTIVE,
> +     PGSCAN_ANON,
> +     PGSCAN_FILE,
>  #ifdef CONFIG_HUGETLB_PAGE
>       NR_HUGETLB,
>  #endif
> diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
> index 22a139f82d75..1fa3b3ad0ff9 100644
> --- a/include/linux/vm_event_item.h
> +++ b/include/linux/vm_event_item.h
> @@ -40,19 +40,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
>               PGLAZYFREED,
>               PGREFILL,
>               PGREUSE,
> -             PGSTEAL_KSWAPD,
> -             PGSTEAL_DIRECT,
> -             PGSTEAL_KHUGEPAGED,
> -             PGSTEAL_PROACTIVE,
> -             PGSCAN_KSWAPD,
> -             PGSCAN_DIRECT,
> -             PGSCAN_KHUGEPAGED,
> -             PGSCAN_PROACTIVE,
>               PGSCAN_DIRECT_THROTTLE,
> -             PGSCAN_ANON,
> -             PGSCAN_FILE,
> -             PGSTEAL_ANON,
> -             PGSTEAL_FILE,
>  #ifdef CONFIG_NUMA
>               PGSCAN_ZONE_RECLAIM_SUCCESS,
>               PGSCAN_ZONE_RECLAIM_FAILED,
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 007413a53b45..e89e77457701 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -328,6 +328,18 @@ static const unsigned int memcg_node_stat_items[] = {
>       PGDEMOTE_DIRECT,
>       PGDEMOTE_KHUGEPAGED,
>       PGDEMOTE_PROACTIVE,
> +     PGSTEAL_KSWAPD,
> +     PGSTEAL_DIRECT,
> +     PGSTEAL_KHUGEPAGED,
> +     PGSTEAL_PROACTIVE,
> +     PGSTEAL_ANON,
> +     PGSTEAL_FILE,
> +     PGSCAN_KSWAPD,
> +     PGSCAN_DIRECT,
> +     PGSCAN_KHUGEPAGED,
> +     PGSCAN_PROACTIVE,
> +     PGSCAN_ANON,
> +     PGSCAN_FILE,
>  #ifdef CONFIG_HUGETLB_PAGE
>       NR_HUGETLB,
>  #endif
> @@ -441,14 +453,6 @@ static const unsigned int memcg_vm_event_stat[] = {
>  #endif
>       PSWPIN,
>       PSWPOUT,
> -     PGSCAN_KSWAPD,
> -     PGSCAN_DIRECT,
> -     PGSCAN_KHUGEPAGED,
> -     PGSCAN_PROACTIVE,
> -     PGSTEAL_KSWAPD,
> -     PGSTEAL_DIRECT,
> -     PGSTEAL_KHUGEPAGED,
> -     PGSTEAL_PROACTIVE,
>       PGFAULT,
>       PGMAJFAULT,
>       PGREFILL,
> @@ -1382,6 +1386,14 @@ static const struct memory_stat memory_stats[] = {
>       { "pgdemote_direct",            PGDEMOTE_DIRECT         },
>       { "pgdemote_khugepaged",        PGDEMOTE_KHUGEPAGED     },
>       { "pgdemote_proactive",         PGDEMOTE_PROACTIVE      },
> +     { "pgsteal_kswapd",             PGSTEAL_KSWAPD          },
> +     { "pgsteal_direct",             PGSTEAL_DIRECT          },
> +     { "pgsteal_khugepaged",         PGSTEAL_KHUGEPAGED      },
> +     { "pgsteal_proactive",          PGSTEAL_PROACTIVE       },
> +     { "pgscan_kswapd",              PGSCAN_KSWAPD           },
> +     { "pgscan_direct",              PGSCAN_DIRECT           },
> +     { "pgscan_khugepaged",          PGSCAN_KHUGEPAGED       },
> +     { "pgscan_proactive",           PGSCAN_PROACTIVE        },
>  #ifdef CONFIG_NUMA_BALANCING
>       { "pgpromote_success",          PGPROMOTE_SUCCESS       },
>  #endif
> @@ -1425,6 +1437,14 @@ static int memcg_page_state_output_unit(int item)
>       case PGDEMOTE_DIRECT:
>       case PGDEMOTE_KHUGEPAGED:
>       case PGDEMOTE_PROACTIVE:
> +     case PGSTEAL_KSWAPD:
> +     case PGSTEAL_DIRECT:
> +     case PGSTEAL_KHUGEPAGED:
> +     case PGSTEAL_PROACTIVE:
> +     case PGSCAN_KSWAPD:
> +     case PGSCAN_DIRECT:
> +     case PGSCAN_KHUGEPAGED:
> +     case PGSCAN_PROACTIVE:
>  #ifdef CONFIG_NUMA_BALANCING
>       case PGPROMOTE_SUCCESS:
>  #endif
> @@ -1496,15 +1516,15 @@ static void memcg_stat_format(struct mem_cgroup 
> *memcg, struct seq_buf *s)
>  
>       /* Accumulated memory events */
>       seq_buf_printf(s, "pgscan %lu\n",
> -                    memcg_events(memcg, PGSCAN_KSWAPD) +
> -                    memcg_events(memcg, PGSCAN_DIRECT) +
> -                    memcg_events(memcg, PGSCAN_PROACTIVE) +
> -                    memcg_events(memcg, PGSCAN_KHUGEPAGED));
> +                    memcg_page_state(memcg, PGSCAN_KSWAPD) +
> +                    memcg_page_state(memcg, PGSCAN_DIRECT) +
> +                    memcg_page_state(memcg, PGSCAN_PROACTIVE) +
> +                    memcg_page_state(memcg, PGSCAN_KHUGEPAGED));
>       seq_buf_printf(s, "pgsteal %lu\n",
> -                    memcg_events(memcg, PGSTEAL_KSWAPD) +
> -                    memcg_events(memcg, PGSTEAL_DIRECT) +
> -                    memcg_events(memcg, PGSTEAL_PROACTIVE) +
> -                    memcg_events(memcg, PGSTEAL_KHUGEPAGED));
> +                    memcg_page_state(memcg, PGSTEAL_KSWAPD) +
> +                    memcg_page_state(memcg, PGSTEAL_DIRECT) +
> +                    memcg_page_state(memcg, PGSTEAL_PROACTIVE) +
> +                    memcg_page_state(memcg, PGSTEAL_KHUGEPAGED));
>  
>       for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
>  #ifdef CONFIG_MEMCG_V1
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 44e4fcd6463c..dd6d87340941 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1984,7 +1984,7 @@ static unsigned long shrink_inactive_list(unsigned long 
> nr_to_scan,
>       unsigned long nr_taken;
>       struct reclaim_stat stat;
>       bool file = is_file_lru(lru);
> -     enum vm_event_item item;
> +     enum node_stat_item item;
>       struct pglist_data *pgdat = lruvec_pgdat(lruvec);
>       bool stalled = false;
>  
> @@ -2010,10 +2010,8 @@ static unsigned long shrink_inactive_list(unsigned 
> long nr_to_scan,
>  
>       __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
>       item = PGSCAN_KSWAPD + reclaimer_offset(sc);
> -     if (!cgroup_reclaim(sc))
> -             __count_vm_events(item, nr_scanned);
> -     count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
> -     __count_vm_events(PGSCAN_ANON + file, nr_scanned);
> +     mod_lruvec_state(lruvec, item, nr_scanned);
> +     mod_lruvec_state(lruvec, PGSCAN_ANON + file, nr_scanned);
>  
>       spin_unlock_irq(&lruvec->lru_lock);
>  
> @@ -2030,10 +2028,8 @@ static unsigned long shrink_inactive_list(unsigned 
> long nr_to_scan,
>                                       stat.nr_demoted);
>       __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
>       item = PGSTEAL_KSWAPD + reclaimer_offset(sc);
> -     if (!cgroup_reclaim(sc))
> -             __count_vm_events(item, nr_reclaimed);
> -     count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
> -     __count_vm_events(PGSTEAL_ANON + file, nr_reclaimed);
> +     mod_lruvec_state(lruvec, item, nr_reclaimed);
> +     mod_lruvec_state(lruvec, PGSTEAL_ANON + file, nr_reclaimed);
>  
>       lru_note_cost_unlock_irq(lruvec, file, stat.nr_pageout,
>                                       nr_scanned - nr_reclaimed);
> @@ -4542,7 +4538,7 @@ static int scan_folios(unsigned long nr_to_scan, struct 
> lruvec *lruvec,
>  {
>       int i;
>       int gen;
> -     enum vm_event_item item;
> +     enum node_stat_item item;
>       int sorted = 0;
>       int scanned = 0;
>       int isolated = 0;
> @@ -4601,13 +4597,11 @@ static int scan_folios(unsigned long nr_to_scan, 
> struct lruvec *lruvec,
>       }
>  
>       item = PGSCAN_KSWAPD + reclaimer_offset(sc);
> -     if (!cgroup_reclaim(sc)) {
> -             __count_vm_events(item, isolated);
> +     if (!cgroup_reclaim(sc))
>               __count_vm_events(PGREFILL, sorted);
> -     }
> -     count_memcg_events(memcg, item, isolated);
> +     mod_lruvec_state(lruvec, item, isolated);
>       count_memcg_events(memcg, PGREFILL, sorted);
> -     __count_vm_events(PGSCAN_ANON + type, isolated);
> +     mod_lruvec_state(lruvec, PGSCAN_ANON + type, isolated);
>       trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, scan_batch,
>                               scanned, skipped, isolated,
>                               type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
> @@ -4692,7 +4686,7 @@ static int evict_folios(unsigned long nr_to_scan, 
> struct lruvec *lruvec,
>       LIST_HEAD(clean);
>       struct folio *folio;
>       struct folio *next;
> -     enum vm_event_item item;
> +     enum node_stat_item item;
>       struct reclaim_stat stat;
>       struct lru_gen_mm_walk *walk;
>       bool skip_retry = false;
> @@ -4756,10 +4750,8 @@ static int evict_folios(unsigned long nr_to_scan, 
> struct lruvec *lruvec,
>                                       stat.nr_demoted);
>  
>       item = PGSTEAL_KSWAPD + reclaimer_offset(sc);
> -     if (!cgroup_reclaim(sc))
> -             __count_vm_events(item, reclaimed);
> -     count_memcg_events(memcg, item, reclaimed);
> -     __count_vm_events(PGSTEAL_ANON + type, reclaimed);
> +     mod_lruvec_state(lruvec, item, reclaimed);
> +     mod_lruvec_state(lruvec, PGSTEAL_ANON + type, reclaimed);
>  
>       spin_unlock_irq(&lruvec->lru_lock);
>  
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 99270713e0c1..d952c1e763e6 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1276,6 +1276,18 @@ const char * const vmstat_text[] = {
>       [I(PGDEMOTE_DIRECT)]                    = "pgdemote_direct",
>       [I(PGDEMOTE_KHUGEPAGED)]                = "pgdemote_khugepaged",
>       [I(PGDEMOTE_PROACTIVE)]                 = "pgdemote_proactive",
> +     [I(PGSTEAL_KSWAPD)]                     = "pgsteal_kswapd",
> +     [I(PGSTEAL_DIRECT)]                     = "pgsteal_direct",
> +     [I(PGSTEAL_KHUGEPAGED)]                 = "pgsteal_khugepaged",
> +     [I(PGSTEAL_PROACTIVE)]                  = "pgsteal_proactive",
> +     [I(PGSTEAL_ANON)]                       = "pgsteal_anon",
> +     [I(PGSTEAL_FILE)]                       = "pgsteal_file",
> +     [I(PGSCAN_KSWAPD)]                      = "pgscan_kswapd",
> +     [I(PGSCAN_DIRECT)]                      = "pgscan_direct",
> +     [I(PGSCAN_KHUGEPAGED)]                  = "pgscan_khugepaged",
> +     [I(PGSCAN_PROACTIVE)]                   = "pgscan_proactive",
> +     [I(PGSCAN_ANON)]                        = "pgscan_anon",
> +     [I(PGSCAN_FILE)]                        = "pgscan_file",
>  #ifdef CONFIG_HUGETLB_PAGE
>       [I(NR_HUGETLB)]                         = "nr_hugetlb",
>  #endif
> @@ -1320,19 +1332,7 @@ const char * const vmstat_text[] = {
>  
>       [I(PGREFILL)]                           = "pgrefill",
>       [I(PGREUSE)]                            = "pgreuse",
> -     [I(PGSTEAL_KSWAPD)]                     = "pgsteal_kswapd",
> -     [I(PGSTEAL_DIRECT)]                     = "pgsteal_direct",
> -     [I(PGSTEAL_KHUGEPAGED)]                 = "pgsteal_khugepaged",
> -     [I(PGSTEAL_PROACTIVE)]                  = "pgsteal_proactive",
> -     [I(PGSCAN_KSWAPD)]                      = "pgscan_kswapd",
> -     [I(PGSCAN_DIRECT)]                      = "pgscan_direct",
> -     [I(PGSCAN_KHUGEPAGED)]                  = "pgscan_khugepaged",
> -     [I(PGSCAN_PROACTIVE)]                   = "pgscan_proactive",
>       [I(PGSCAN_DIRECT_THROTTLE)]             = "pgscan_direct_throttle",
> -     [I(PGSCAN_ANON)]                        = "pgscan_anon",
> -     [I(PGSCAN_FILE)]                        = "pgscan_file",
> -     [I(PGSTEAL_ANON)]                       = "pgsteal_anon",
> -     [I(PGSTEAL_FILE)]                       = "pgsteal_file",
>  
>  #ifdef CONFIG_NUMA
>       [I(PGSCAN_ZONE_RECLAIM_SUCCESS)]        = "zone_reclaim_success",
> -- 
> 2.47.3

-- 
Michal Hocko
SUSE Labs

Reply via email to