Avoid protnone conflict on anonymous VMAs. Shmem unaffected.
NUMA stats fed from uffd fault path instead.
Add NUMAB_SKIP_UFFD_MINOR trace reason.

Signed-off-by: Kiryl Shutsemau (Meta) <[email protected]>
Assisted-by: Claude:claude-opus-4-6
---
 include/linux/sched/numa_balancing.h |  1 +
 include/trace/events/sched.h         |  3 ++-
 kernel/sched/fair.c                  | 13 +++++++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched/numa_balancing.h 
b/include/linux/sched/numa_balancing.h
index 52b22c5c396d..5668074a4271 100644
--- a/include/linux/sched/numa_balancing.h
+++ b/include/linux/sched/numa_balancing.h
@@ -23,6 +23,7 @@ enum numa_vmaskip_reason {
        NUMAB_SKIP_PID_INACTIVE,
        NUMAB_SKIP_IGNORE_PID,
        NUMAB_SKIP_SEQ_COMPLETED,
+       NUMAB_SKIP_UFFD_MINOR,
 };
 
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 7b2645b50e78..02e79b56db28 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -728,7 +728,8 @@ DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa,
        EM( NUMAB_SKIP_SCAN_DELAY,              "scan_delay" )  \
        EM( NUMAB_SKIP_PID_INACTIVE,            "pid_inactive" )        \
        EM( NUMAB_SKIP_IGNORE_PID,              "ignore_pid_inactive" )         
\
-       EMe(NUMAB_SKIP_SEQ_COMPLETED,           "seq_completed" )
+       EM( NUMAB_SKIP_SEQ_COMPLETED,           "seq_completed" )       \
+       EMe(NUMAB_SKIP_UFFD_MINOR,              "uffd_minor" )
 
 /* Redefine for export. */
 #undef EM
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ab4114712be7..57beb04562cf 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -25,6 +25,7 @@
 #include <linux/hugetlb_inline.h>
 #include <linux/jiffies.h>
 #include <linux/mm_api.h>
+#include <linux/userfaultfd_k.h>
 #include <linux/highmem.h>
 #include <linux/spinlock_api.h>
 #include <linux/cpumask_api.h>
@@ -3459,6 +3460,18 @@ static void task_numa_work(struct callback_head *work)
                        continue;
                }
 
+               /*
+                * Skip anonymous VMAs registered for userfaultfd minor faults.
+                * Both NUMA balancing and uffd use protnone PTEs on anonymous
+                * memory — let uffd own the hinting. For shmem, 
UFFDIO_DEACTIVATE
+                * zaps PTEs entirely (no protnone conflict), so NUMA scanning
+                * can proceed normally.
+                */
+               if (vma_is_anonymous(vma) && userfaultfd_minor(vma)) {
+                       trace_sched_skip_vma_numa(mm, vma, 
NUMAB_SKIP_UFFD_MINOR);
+                       continue;
+               }
+
                /*
                 * Shared library pages mapped by multiple processes are not
                 * migrated as it is expected they are cache replicated. Avoid
-- 
2.51.2


Reply via email to