Private node services may want to support user-driven migration
(migrate_pages syscall, mbind) to allow data movement between regular
and private nodes.

ZONE_DEVICE always rejects user migration, but private nodes should
be able to opt in.

Add NP_OPS_MIGRATION flag and folio_managed_user_migrate() wrapper that
dispatches migration requests.  Private nodes can either set the flag
and provide a custom migrate_to callback for driver-managed migration.

In migrate_to_node(), allows GFP_PRIVATE when the destination node
supports NP_OPS_MIGRATION, enabling migrate_pages syscall to target
private nodes.

Signed-off-by: Gregory Price <[email protected]>
---
 drivers/base/node.c          |   4 ++
 include/linux/migrate.h      |  10 +++
 include/linux/node_private.h | 122 +++++++++++++++++++++++++++++++++++
 mm/damon/paddr.c             |   3 +
 mm/internal.h                |  24 +++++++
 mm/mempolicy.c               |  10 +--
 mm/migrate.c                 |  49 ++++++++++----
 mm/rmap.c                    |   4 +-
 8 files changed, 206 insertions(+), 20 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 646dc48a23b5..e587f5781135 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -949,6 +949,10 @@ int node_private_set_ops(int nid, const struct 
node_private_ops *ops)
        if (!node_possible(nid))
                return -EINVAL;
 
+       if ((ops->flags & NP_OPS_MIGRATION) &&
+           (!ops->migrate_to || !ops->folio_migrate))
+               return -EINVAL;
+
        mutex_lock(&node_private_lock);
        np = rcu_dereference_protected(NODE_DATA(nid)->node_private,
                                       lockdep_is_held(&node_private_lock));
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 26ca00c325d9..7b2da3875ff2 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -71,6 +71,9 @@ void folio_migrate_flags(struct folio *newfolio, struct folio 
*folio);
 int folio_migrate_mapping(struct address_space *mapping,
                struct folio *newfolio, struct folio *folio, int extra_count);
 int set_movable_ops(const struct movable_operations *ops, enum pagetype type);
+int migrate_folios_to_node(struct list_head *folios, int nid,
+                                   enum migrate_mode mode,
+                                   enum migrate_reason reason);
 
 #else
 
@@ -96,6 +99,13 @@ static inline int set_movable_ops(const struct 
movable_operations *ops, enum pag
 {
        return -ENOSYS;
 }
+static inline int migrate_folios_to_node(struct list_head *folios,
+                                                 int nid,
+                                                 enum migrate_mode mode,
+                                                 enum migrate_reason reason)
+{
+       return -ENOSYS;
+}
 
 #endif /* CONFIG_MIGRATION */
 
diff --git a/include/linux/node_private.h b/include/linux/node_private.h
index f9dd2d25c8a5..0c5be1ee6e60 100644
--- a/include/linux/node_private.h
+++ b/include/linux/node_private.h
@@ -4,6 +4,7 @@
 
 #include <linux/completion.h>
 #include <linux/memremap.h>
+#include <linux/migrate_mode.h>
 #include <linux/mm.h>
 #include <linux/nodemask.h>
 #include <linux/rcupdate.h>
@@ -52,15 +53,40 @@ struct vm_fault;
  *     or NULL when called for the final (original) folio after all sub-folios
  *     have been split off.
  *
+ * @migrate_to: Migrate folios TO this node.
+ *     [refcounted callback]
+ *     Returns: 0 on full success, >0 = number of folios that failed to
+ *              migrate, <0 = error.  Matches migrate_pages() semantics.
+ *              @nr_succeeded is set to the number of successfully migrated
+ *              folios (may be NULL if caller doesn't need it).
+ *
+ * @folio_migrate: Post-migration notification that a folio on this private 
node
+ *    changed physical location (on the same node or a different node).
+ *    [folio-referenced callback]
+ *     Called from migrate_folio_move() after data has been copied but before
+ *     migration entries are replaced with real PTEs.  Both @src and @dst are
+ *     locked.  Faults block in migration_entry_wait() until
+ *     remove_migration_ptes() runs, so the service can safely update
+ *     PFN-based metadata (compression tables, device page tables, DMA
+ *     mappings, etc.) before any access through the page tables.
+ *
  * @flags: Operation exclusion flags (NP_OPS_* constants).
  *
  */
 struct node_private_ops {
        bool (*free_folio)(struct folio *folio);
        void (*folio_split)(struct folio *folio, struct folio *new_folio);
+       int (*migrate_to)(struct list_head *folios, int nid,
+                                 enum migrate_mode mode,
+                                 enum migrate_reason reason,
+                                 unsigned int *nr_succeeded);
+       void (*folio_migrate)(struct folio *src, struct folio *dst);
        unsigned long flags;
 };
 
+/* Allow user/kernel migration; requires migrate_to and folio_migrate */
+#define NP_OPS_MIGRATION               BIT(0)
+
 /**
  * struct node_private - Per-node container for N_MEMORY_PRIVATE nodes
  *
@@ -177,6 +203,81 @@ static inline void folio_managed_split_cb(struct folio 
*original_folio,
                node_private_split_cb(original_folio, new_folio);
 }
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+static inline int folio_managed_allows_user_migrate(struct folio *folio)
+{
+       if (folio_is_zone_device(folio))
+               return -ENOENT;
+       return node_private_has_flag(folio_nid(folio), NP_OPS_MIGRATION) ?
+              folio_nid(folio) : -ENOENT;
+}
+
+/**
+ * folio_managed_allows_migrate - Check if a managed folio supports migration
+ * @folio: The folio to check
+ *
+ * Returns true if the folio can be migrated.  For zone_device folios, only
+ * device_private and device_coherent support migration.  For private node
+ * folios, migration requires NP_OPS_MIGRATION.  Normal folios always
+ * return true.
+ */
+static inline bool folio_managed_allows_migrate(struct folio *folio)
+{
+       if (folio_is_zone_device(folio))
+               return folio_is_device_private(folio) ||
+                      folio_is_device_coherent(folio);
+       if (folio_is_private_node(folio))
+               return folio_private_flags(folio, NP_OPS_MIGRATION);
+       return true;
+}
+
+/**
+ * node_private_migrate_to - Attempt service-specific migration to a private 
node
+ * @folios: list of folios to migrate (may sleep)
+ * @nid: target node
+ * @mode: migration mode (MIGRATE_ASYNC, MIGRATE_SYNC, etc.)
+ * @reason: migration reason (MR_DEMOTION, MR_SYSCALL, etc.)
+ * @nr_succeeded: optional output for number of successfully migrated folios
+ *
+ * If @nid is an N_MEMORY_PRIVATE node with a migrate_to callback,
+ * invokes the callback and returns the result with migrate_pages()
+ * semantics (0 = full success, >0 = failure count, <0 = error).
+ * Returns -ENODEV if the node is not private or the service is being
+ * torn down.
+ *
+ * The source folios are on other nodes, so they do not pin the target
+ * node's node_private.  A temporary refcount is taken under rcu_read_lock
+ * to keep node_private (and the service module) alive across the callback.
+ */
+static inline int node_private_migrate_to(struct list_head *folios, int nid,
+                                         enum migrate_mode mode,
+                                         enum migrate_reason reason,
+                                         unsigned int *nr_succeeded)
+{
+       int (*fn)(struct list_head *, int, enum migrate_mode,
+                 enum migrate_reason, unsigned int *);
+       struct node_private *np;
+       int ret;
+
+       rcu_read_lock();
+       np = rcu_dereference(NODE_DATA(nid)->node_private);
+       if (!np || !np->ops || !np->ops->migrate_to ||
+           !refcount_inc_not_zero(&np->refcount)) {
+               rcu_read_unlock();
+               return -ENODEV;
+       }
+       fn = np->ops->migrate_to;
+       rcu_read_unlock();
+
+       ret = fn(folios, nid, mode, reason, nr_succeeded);
+
+       if (refcount_dec_and_test(&np->refcount))
+               complete(&np->released);
+
+       return ret;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
 #else /* !CONFIG_NUMA */
 
 static inline bool folio_is_private_node(struct folio *folio)
@@ -242,6 +343,27 @@ int node_private_clear_ops(int nid, const struct 
node_private_ops *ops);
 
 #else /* !CONFIG_NUMA || !CONFIG_MEMORY_HOTPLUG */
 
+static inline int folio_managed_allows_user_migrate(struct folio *folio)
+{
+       return -ENOENT;
+}
+
+static inline bool folio_managed_allows_migrate(struct folio *folio)
+{
+       if (folio_is_zone_device(folio))
+               return folio_is_device_private(folio) ||
+                      folio_is_device_coherent(folio);
+       return true;
+}
+
+static inline int node_private_migrate_to(struct list_head *folios, int nid,
+                                         enum migrate_mode mode,
+                                         enum migrate_reason reason,
+                                         unsigned int *nr_succeeded)
+{
+       return -ENODEV;
+}
+
 static inline int node_private_register(int nid, struct node_private *np)
 {
        return -ENODEV;
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 07a8aead439e..532b8e2c62b0 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -277,6 +277,9 @@ static unsigned long damon_pa_migrate(struct damon_region 
*r,
                else
                        *sz_filter_passed += folio_size(folio) / addr_unit;
 
+               if (!folio_managed_allows_migrate(folio))
+                       goto put_folio;
+
                if (!folio_isolate_lru(folio))
                        goto put_folio;
                list_add(&folio->lru, &folio_list);
diff --git a/mm/internal.h b/mm/internal.h
index 658da41cdb8e..6ab4679fe943 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1442,6 +1442,30 @@ static inline bool folio_managed_on_free(struct folio 
*folio)
        return false;
 }
 
+/**
+ * folio_managed_migrate_notify - Notify service that a folio changed location
+ * @src: the old folio (about to be freed)
+ * @dst: the new folio (data already copied, migration entries still in place)
+ *
+ * Called from migrate_folio_move() after data has been copied but before
+ * remove_migration_ptes() installs real PTEs pointing to @dst.  While
+ * migration entries are in place, faults block in migration_entry_wait(),
+ * so the service can safely update PFN-based metadata before any access
+ * through the page tables.  Both @src and @dst are locked.
+ */
+static inline void folio_managed_migrate_notify(struct folio *src,
+                                               struct folio *dst)
+{
+       const struct node_private_ops *ops;
+
+       if (!folio_is_private_node(src))
+               return;
+
+       ops = folio_node_private_ops(src);
+       if (ops && ops->folio_migrate)
+               ops->folio_migrate(src, dst);
+}
+
 struct vm_struct *__get_vm_area_node(unsigned long size,
                                     unsigned long align, unsigned long shift,
                                     unsigned long vm_flags, unsigned long 
start,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 68a98ba57882..2b0f9762d171 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -111,6 +111,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/printk.h>
 #include <linux/leafops.h>
+#include <linux/node_private.h>
 #include <linux/gcd.h>
 
 #include <asm/tlbflush.h>
@@ -1282,11 +1283,6 @@ static long migrate_to_node(struct mm_struct *mm, int 
source, int dest,
        LIST_HEAD(pagelist);
        long nr_failed;
        long err = 0;
-       struct migration_target_control mtc = {
-               .nid = dest,
-               .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
-               .reason = MR_SYSCALL,
-       };
 
        nodes_clear(nmask);
        node_set(source, nmask);
@@ -1311,8 +1307,8 @@ static long migrate_to_node(struct mm_struct *mm, int 
source, int dest,
        mmap_read_unlock(mm);
 
        if (!list_empty(&pagelist)) {
-               err = migrate_pages(&pagelist, alloc_migration_target, NULL,
-                       (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL);
+               err = migrate_folios_to_node(&pagelist, dest, MIGRATE_SYNC,
+                                            MR_SYSCALL);
                if (err)
                        putback_movable_pages(&pagelist);
        }
diff --git a/mm/migrate.c b/mm/migrate.c
index 5169f9717f60..a54d4af04df3 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -43,6 +43,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/memory-tiers.h>
 #include <linux/pagewalk.h>
+#include <linux/node_private.h>
 
 #include <asm/tlbflush.h>
 
@@ -1387,6 +1388,8 @@ static int migrate_folio_move(free_folio_t put_new_folio, 
unsigned long private,
        if (old_page_state & PAGE_WAS_MLOCKED)
                lru_add_drain();
 
+       folio_managed_migrate_notify(src, dst);
+
        if (old_page_state & PAGE_WAS_MAPPED)
                remove_migration_ptes(src, dst, 0);
 
@@ -2165,6 +2168,7 @@ int migrate_pages(struct list_head *from, new_folio_t 
get_new_folio,
 
        return rc_gather;
 }
+EXPORT_SYMBOL_GPL(migrate_pages);
 
 struct folio *alloc_migration_target(struct folio *src, unsigned long private)
 {
@@ -2204,6 +2208,31 @@ struct folio *alloc_migration_target(struct folio *src, 
unsigned long private)
 
        return __folio_alloc(gfp_mask, order, nid, mtc->nmask);
 }
+EXPORT_SYMBOL_GPL(alloc_migration_target);
+
+static int __migrate_folios_to_node(struct list_head *folios, int nid,
+                                   enum migrate_mode mode,
+                                   enum migrate_reason reason)
+{
+       struct migration_target_control mtc = {
+               .nid = nid,
+               .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
+               .reason = reason,
+       };
+
+       return migrate_pages(folios, alloc_migration_target, NULL,
+                            (unsigned long)&mtc, mode, reason, NULL);
+}
+
+int migrate_folios_to_node(struct list_head *folios, int nid,
+                          enum migrate_mode mode,
+                          enum migrate_reason reason)
+{
+       if (node_state(nid, N_MEMORY_PRIVATE))
+               return node_private_migrate_to(folios, nid, mode,
+                                              reason, NULL);
+       return __migrate_folios_to_node(folios, nid, mode, reason);
+}
 
 #ifdef CONFIG_NUMA
 
@@ -2221,14 +2250,8 @@ static int store_status(int __user *status, int start, 
int value, int nr)
 static int do_move_pages_to_node(struct list_head *pagelist, int node)
 {
        int err;
-       struct migration_target_control mtc = {
-               .nid = node,
-               .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
-               .reason = MR_SYSCALL,
-       };
 
-       err = migrate_pages(pagelist, alloc_migration_target, NULL,
-               (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL);
+       err = migrate_folios_to_node(pagelist, node, MIGRATE_SYNC, MR_SYSCALL);
        if (err)
                putback_movable_pages(pagelist);
        return err;
@@ -2240,7 +2263,7 @@ static int __add_folio_for_migration(struct folio *folio, 
int node,
        if (is_zero_folio(folio) || is_huge_zero_folio(folio))
                return -EFAULT;
 
-       if (folio_is_zone_device(folio))
+       if (!folio_managed_allows_migrate(folio))
                return -ENOENT;
 
        if (folio_nid(folio) == node)
@@ -2364,7 +2387,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t 
task_nodes,
                err = -ENODEV;
                if (node < 0 || node >= MAX_NUMNODES)
                        goto out_flush;
-               if (!node_state(node, N_MEMORY))
+               if (!node_state(node, N_MEMORY) &&
+                   !node_state(node, N_MEMORY_PRIVATE))
                        goto out_flush;
 
                err = -EACCES;
@@ -2449,8 +2473,8 @@ static void do_pages_stat_array(struct mm_struct *mm, 
unsigned long nr_pages,
                if (folio) {
                        if (is_zero_folio(folio) || is_huge_zero_folio(folio))
                                err = -EFAULT;
-                       else if (folio_is_zone_device(folio))
-                               err = -ENOENT;
+                       else if (unlikely(folio_is_private_managed(folio)))
+                               err = folio_managed_allows_user_migrate(folio);
                        else
                                err = folio_nid(folio);
                        folio_walk_end(&fw, vma);
@@ -2660,6 +2684,9 @@ int migrate_misplaced_folio_prepare(struct folio *folio,
        int nr_pages = folio_nr_pages(folio);
        pg_data_t *pgdat = NODE_DATA(node);
 
+       if (!folio_managed_allows_migrate(folio))
+               return -ENOENT;
+
        if (folio_is_file_lru(folio)) {
                /*
                 * Do not migrate file folios that are mapped in multiple
diff --git a/mm/rmap.c b/mm/rmap.c
index f955f02d570e..805f9ceb82f3 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -72,6 +72,7 @@
 #include <linux/backing-dev.h>
 #include <linux/page_idle.h>
 #include <linux/memremap.h>
+#include <linux/node_private.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/mm_inline.h>
 #include <linux/oom.h>
@@ -2616,8 +2617,7 @@ void try_to_migrate(struct folio *folio, enum ttu_flags 
flags)
                                        TTU_SYNC | TTU_BATCH_FLUSH)))
                return;
 
-       if (folio_is_zone_device(folio) &&
-           (!folio_is_device_private(folio) && 
!folio_is_device_coherent(folio)))
+       if (!folio_managed_allows_migrate(folio))
                return;
 
        /*
-- 
2.53.0


Reply via email to