Private node zones should not be compacted unless the service explicitly opts in - as compaction requires migration and services may have PFN-based metadata that needs updating.
Add a folio_migrate callback which fires from migrate_folio_move() for each relocated folio before faults are unblocked. Add zone_supports_compaction() which returns true for normal zones and checks NP_OPS_COMPACTION for N_MEMORY_PRIVATE zones. Filter three direct compaction zone loops: - compaction_zonelist_suitable() (reclaimer eligibility) - try_to_compact_pages() (direct compaction) - compact_node() (proactive/manual compaction) kcompactd paths are intentionally unfiltered -- the service is responsible for starting kcompactd on its node. NP_OPS_COMPACTION requires NP_OPS_MIGRATION. Signed-off-by: Gregory Price <[email protected]> --- drivers/base/node.c | 4 ++++ include/linux/node_private.h | 2 ++ mm/compaction.c | 26 ++++++++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/drivers/base/node.c b/drivers/base/node.c index 88aaac45e814..da523aca18fa 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -965,6 +965,10 @@ int node_private_set_ops(int nid, const struct node_private_ops *ops) !(ops->flags & NP_OPS_MIGRATION)) return -EINVAL; + if ((ops->flags & NP_OPS_COMPACTION) && + !(ops->flags & NP_OPS_MIGRATION)) + return -EINVAL; + mutex_lock(&node_private_lock); np = rcu_dereference_protected(NODE_DATA(nid)->node_private, lockdep_is_held(&node_private_lock)); diff --git a/include/linux/node_private.h b/include/linux/node_private.h index 5ac60db1f044..fe0336773ddb 100644 --- a/include/linux/node_private.h +++ b/include/linux/node_private.h @@ -142,6 +142,8 @@ struct node_private_ops { #define NP_OPS_RECLAIM BIT(4) /* Allow NUMA balancing to scan and migrate folios on this node */ #define NP_OPS_NUMA_BALANCING BIT(5) +/* Allow compaction to run on the node. Service must start kcompactd. */ +#define NP_OPS_COMPACTION BIT(6) /* Private node is OOM-eligible: reclaim can run and pages can be demoted here */ #define NP_OPS_OOM_ELIGIBLE (NP_OPS_RECLAIM | NP_OPS_DEMOTION) diff --git a/mm/compaction.c b/mm/compaction.c index 6a65145b03d8..d8532b957ec6 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -24,9 +24,26 @@ #include <linux/page_owner.h> #include <linux/psi.h> #include <linux/cpuset.h> +#include <linux/node_private.h> #include "internal.h" #ifdef CONFIG_COMPACTION + +/* + * Private node zones require NP_OPS_COMPACTION to opt in. Normal zones + * always support compaction. + */ +static inline bool zone_supports_compaction(struct zone *zone) +{ +#ifdef CONFIG_NUMA + if (!node_state(zone_to_nid(zone), N_MEMORY_PRIVATE)) + return true; + return zone_private_flags(zone, NP_OPS_COMPACTION); +#else + return true; +#endif +} + /* * Fragmentation score check interval for proactive compaction purposes. */ @@ -2443,6 +2460,9 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, ac->highest_zoneidx, ac->nodemask) { unsigned long available; + if (!zone_supports_compaction(zone)) + continue; + /* * Do not consider all the reclaimable memory because we do not * want to trash just for a single high order allocation which @@ -2832,6 +2852,9 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, if (!numa_zone_alloc_allowed(alloc_flags, zone, gfp_mask)) continue; + if (!zone_supports_compaction(zone)) + continue; + if (prio > MIN_COMPACT_PRIORITY && compaction_deferred(zone, order)) { rc = max_t(enum compact_result, COMPACT_DEFERRED, rc); @@ -2906,6 +2929,9 @@ static int compact_node(pg_data_t *pgdat, bool proactive) if (!populated_zone(zone)) continue; + if (!zone_supports_compaction(zone)) + continue; + if (fatal_signal_pending(current)) return -EINTR; -- 2.53.0
