Avoid protnone conflict on anonymous VMAs. Shmem unaffected. NUMA stats fed from uffd fault path instead. Add NUMAB_SKIP_UFFD_MINOR trace reason.
Signed-off-by: Kiryl Shutsemau (Meta) <[email protected]> Assisted-by: Claude:claude-opus-4-6 --- include/linux/sched/numa_balancing.h | 1 + include/trace/events/sched.h | 3 ++- kernel/sched/fair.c | 13 +++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h index 52b22c5c396d..5668074a4271 100644 --- a/include/linux/sched/numa_balancing.h +++ b/include/linux/sched/numa_balancing.h @@ -23,6 +23,7 @@ enum numa_vmaskip_reason { NUMAB_SKIP_PID_INACTIVE, NUMAB_SKIP_IGNORE_PID, NUMAB_SKIP_SEQ_COMPLETED, + NUMAB_SKIP_UFFD_MINOR, }; #ifdef CONFIG_NUMA_BALANCING diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 7b2645b50e78..02e79b56db28 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -728,7 +728,8 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa, EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \ EM( NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) \ EM( NUMAB_SKIP_IGNORE_PID, "ignore_pid_inactive" ) \ - EMe(NUMAB_SKIP_SEQ_COMPLETED, "seq_completed" ) + EM( NUMAB_SKIP_SEQ_COMPLETED, "seq_completed" ) \ + EMe(NUMAB_SKIP_UFFD_MINOR, "uffd_minor" ) /* Redefine for export. */ #undef EM diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ab4114712be7..57beb04562cf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -25,6 +25,7 @@ #include <linux/hugetlb_inline.h> #include <linux/jiffies.h> #include <linux/mm_api.h> +#include <linux/userfaultfd_k.h> #include <linux/highmem.h> #include <linux/spinlock_api.h> #include <linux/cpumask_api.h> @@ -3459,6 +3460,18 @@ static void task_numa_work(struct callback_head *work) continue; } + /* + * Skip anonymous VMAs registered for userfaultfd minor faults. + * Both NUMA balancing and uffd use protnone PTEs on anonymous + * memory — let uffd own the hinting. For shmem, UFFDIO_DEACTIVATE + * zaps PTEs entirely (no protnone conflict), so NUMA scanning + * can proceed normally. + */ + if (vma_is_anonymous(vma) && userfaultfd_minor(vma)) { + trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_UFFD_MINOR); + continue; + } + /* * Shared library pages mapped by multiple processes are not * migrated as it is expected they are cache replicated. Avoid -- 2.51.2

