On 2/12/26 05:51, JP Kobryn wrote:
> It would be useful to see a breakdown of allocations to understand which
> NUMA policies are driving them. For example, when investigating memory
> pressure, having policy-specific counts could show that allocations were
> bound to the affected node (via MPOL_BIND).
>
> Add per-policy page allocation counters as new node stat items. These
> counters can provide correlation between a mempolicy and pressure on a
> given node.
>
> Signed-off-by: JP Kobryn <[email protected]>
> Suggested-by: Johannes Weiner <[email protected]>
Are the numa_{hit,miss,etc.} counters insufficient? Could they be extended
in a way that would capture any missing important details? A counter per
policy type seems exhaustive, but then on one hand it might be not important
to distinguish beetween some of them, and on the other hand it doesn't track
the nodemask anyway.
> ---
> include/linux/mmzone.h | 9 +++++++++
> mm/mempolicy.c | 30 ++++++++++++++++++++++++++++--
> mm/vmstat.c | 9 +++++++++
> 3 files changed, 46 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index fc5d6c88d2f0..762609d5f0af 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -255,6 +255,15 @@ enum node_stat_item {
> PGDEMOTE_DIRECT,
> PGDEMOTE_KHUGEPAGED,
> PGDEMOTE_PROACTIVE,
> +#ifdef CONFIG_NUMA
> + PGALLOC_MPOL_DEFAULT,
> + PGALLOC_MPOL_PREFERRED,
> + PGALLOC_MPOL_BIND,
> + PGALLOC_MPOL_INTERLEAVE,
> + PGALLOC_MPOL_LOCAL,
> + PGALLOC_MPOL_PREFERRED_MANY,
> + PGALLOC_MPOL_WEIGHTED_INTERLEAVE,
> +#endif
> #ifdef CONFIG_HUGETLB_PAGE
> NR_HUGETLB,
> #endif
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 68a98ba57882..3c64784af761 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -217,6 +217,21 @@ static void reduce_interleave_weights(unsigned int *bw,
> u8 *new_iw)
> new_iw[nid] /= iw_gcd;
> }
>
> +#define CHECK_MPOL_NODE_STAT_OFFSET(mpol) \
> + BUILD_BUG_ON(PGALLOC_##mpol - mpol != PGALLOC_MPOL_DEFAULT)
> +
> +static enum node_stat_item mpol_node_stat(unsigned short mode)
> +{
> + CHECK_MPOL_NODE_STAT_OFFSET(MPOL_PREFERRED);
> + CHECK_MPOL_NODE_STAT_OFFSET(MPOL_BIND);
> + CHECK_MPOL_NODE_STAT_OFFSET(MPOL_INTERLEAVE);
> + CHECK_MPOL_NODE_STAT_OFFSET(MPOL_LOCAL);
> + CHECK_MPOL_NODE_STAT_OFFSET(MPOL_PREFERRED_MANY);
> + CHECK_MPOL_NODE_STAT_OFFSET(MPOL_WEIGHTED_INTERLEAVE);
> +
> + return PGALLOC_MPOL_DEFAULT + mode;
> +}
> +
> int mempolicy_set_node_perf(unsigned int node, struct access_coordinate
> *coords)
> {
> struct weighted_interleave_state *new_wi_state, *old_wi_state = NULL;
> @@ -2446,8 +2461,14 @@ static struct page *alloc_pages_mpol(gfp_t gfp,
> unsigned int order,
>
> nodemask = policy_nodemask(gfp, pol, ilx, &nid);
>
> - if (pol->mode == MPOL_PREFERRED_MANY)
> - return alloc_pages_preferred_many(gfp, order, nid, nodemask);
> + if (pol->mode == MPOL_PREFERRED_MANY) {
> + page = alloc_pages_preferred_many(gfp, order, nid, nodemask);
> + if (page)
> + __mod_node_page_state(page_pgdat(page),
> + mpol_node_stat(MPOL_PREFERRED_MANY), 1
> << order);
> +
> + return page;
> + }
>
> if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
> /* filter "hugepage" allocation, unless from alloc_pages() */
> @@ -2472,6 +2493,9 @@ static struct page *alloc_pages_mpol(gfp_t gfp,
> unsigned int order,
> page = __alloc_frozen_pages_noprof(
> gfp | __GFP_THISNODE | __GFP_NORETRY, order,
> nid, NULL);
> + if (page)
> + __mod_node_page_state(page_pgdat(page),
> + mpol_node_stat(pol->mode), 1 <<
> order);
> if (page || !(gfp & __GFP_DIRECT_RECLAIM))
> return page;
> /*
> @@ -2484,6 +2508,8 @@ static struct page *alloc_pages_mpol(gfp_t gfp,
> unsigned int order,
> }
>
> page = __alloc_frozen_pages_noprof(gfp, order, nid, nodemask);
> + if (page)
> + __mod_node_page_state(page_pgdat(page),
> mpol_node_stat(pol->mode), 1 << order);
>
> if (unlikely(pol->mode == MPOL_INTERLEAVE ||
> pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) {
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 65de88cdf40e..74e0ddde1e93 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1291,6 +1291,15 @@ const char * const vmstat_text[] = {
> [I(PGDEMOTE_DIRECT)] = "pgdemote_direct",
> [I(PGDEMOTE_KHUGEPAGED)] = "pgdemote_khugepaged",
> [I(PGDEMOTE_PROACTIVE)] = "pgdemote_proactive",
> +#ifdef CONFIG_NUMA
> + [I(PGALLOC_MPOL_DEFAULT)] = "pgalloc_mpol_default",
> + [I(PGALLOC_MPOL_PREFERRED)] = "pgalloc_mpol_preferred",
> + [I(PGALLOC_MPOL_BIND)] = "pgalloc_mpol_bind",
> + [I(PGALLOC_MPOL_INTERLEAVE)] = "pgalloc_mpol_interleave",
> + [I(PGALLOC_MPOL_LOCAL)] = "pgalloc_mpol_local",
> + [I(PGALLOC_MPOL_PREFERRED_MANY)] = "pgalloc_mpol_preferred_many",
> + [I(PGALLOC_MPOL_WEIGHTED_INTERLEAVE)] =
> "pgalloc_mpol_weighted_interleave",
> +#endif
> #ifdef CONFIG_HUGETLB_PAGE
> [I(NR_HUGETLB)] = "nr_hugetlb",
> #endif