Not all private nodes may wish to engage in NUMA balancing faults. Add the NP_OPS_NUMA_BALANCING flag (BIT(5)) as an opt-in method.
Introduce folio_managed_allows_numa() helper: ZONE_DEVICE folios always return false (never NUMA-scanned) NP_OPS_NUMA_BALANCING filters for private nodes In do_numa_page(), if a private-node folio with NP_OPS_PROTECT_WRITE is still on its node after a failed/skipped migration, enforce write-protection so the next write triggers handle_fault. Signed-off-by: Gregory Price <[email protected]> --- drivers/base/node.c | 4 ++++ include/linux/node_private.h | 16 ++++++++++++++++ mm/memory.c | 11 +++++++++++ mm/mempolicy.c | 5 ++++- 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/base/node.c b/drivers/base/node.c index a4955b9b5b93..88aaac45e814 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -961,6 +961,10 @@ int node_private_set_ops(int nid, const struct node_private_ops *ops) (ops->flags & NP_OPS_PROTECT_WRITE)) return -EINVAL; + if ((ops->flags & NP_OPS_NUMA_BALANCING) && + !(ops->flags & NP_OPS_MIGRATION)) + return -EINVAL; + mutex_lock(&node_private_lock); np = rcu_dereference_protected(NODE_DATA(nid)->node_private, lockdep_is_held(&node_private_lock)); diff --git a/include/linux/node_private.h b/include/linux/node_private.h index 34d862f09e24..5ac60db1f044 100644 --- a/include/linux/node_private.h +++ b/include/linux/node_private.h @@ -140,6 +140,8 @@ struct node_private_ops { #define NP_OPS_PROTECT_WRITE BIT(3) /* Kernel reclaim (kswapd, direct reclaim, OOM) operates on this node */ #define NP_OPS_RECLAIM BIT(4) +/* Allow NUMA balancing to scan and migrate folios on this node */ +#define NP_OPS_NUMA_BALANCING BIT(5) /* Private node is OOM-eligible: reclaim can run and pages can be demoted here */ #define NP_OPS_OOM_ELIGIBLE (NP_OPS_RECLAIM | NP_OPS_DEMOTION) @@ -263,6 +265,15 @@ static inline void folio_managed_split_cb(struct folio *original_folio, } #ifdef CONFIG_MEMORY_HOTPLUG +static inline bool folio_managed_allows_numa(struct folio *folio) +{ + if (!folio_is_private_managed(folio)) + return true; + if (folio_is_zone_device(folio)) + return false; + return folio_private_flags(folio, NP_OPS_NUMA_BALANCING); +} + static inline int folio_managed_allows_user_migrate(struct folio *folio) { if (folio_is_zone_device(folio)) @@ -443,6 +454,11 @@ int node_private_clear_ops(int nid, const struct node_private_ops *ops); #else /* !CONFIG_NUMA || !CONFIG_MEMORY_HOTPLUG */ +static inline bool folio_managed_allows_numa(struct folio *folio) +{ + return !folio_is_zone_device(folio); +} + static inline int folio_managed_allows_user_migrate(struct folio *folio) { return -ENOENT; diff --git a/mm/memory.c b/mm/memory.c index 0f78988befef..88a581baae40 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -78,6 +78,7 @@ #include <linux/sched/sysctl.h> #include <linux/pgalloc.h> #include <linux/uaccess.h> +#include <linux/node_private.h> #include <trace/events/kmem.h> @@ -6041,6 +6042,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (!folio || folio_is_zone_device(folio)) goto out_map; + /* + * We do not need to check private-node folios here because the private + * memory service either never opted in to NUMA balancing, or it did + * and we need to restore private PTE controls on the failure path. + */ + nid = folio_nid(folio); nr_pages = folio_nr_pages(folio); @@ -6078,6 +6085,10 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) /* * Make it present again, depending on how arch implements * non-accessible ptes, some can allow access by kernel mode. + * + * If the folio is still on a private node with NP_OPS_PROTECT_WRITE, + * enforce write-protection so the next write triggers handle_fault. + * This covers migration-failed and migration-skipped paths. */ if (unlikely(folio && folio_managed_wrprotect(folio))) { writable = false; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 8ac014950e88..8a3a9916ab59 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -861,7 +861,10 @@ bool folio_can_map_prot_numa(struct folio *folio, struct vm_area_struct *vma, { int nid; - if (!folio || folio_is_zone_device(folio) || folio_test_ksm(folio)) + if (!folio || folio_test_ksm(folio)) + return false; + + if (unlikely(!folio_managed_allows_numa(folio))) return false; /* Also skip shared copy-on-write folios */ -- 2.53.0
