Add a sample CXL type-3 driver that registers device memory as
private-node NUMA memory reachable only via explicit mempolicy
(set_mempolicy / mbind).

Probe flow:
  1. Call cxl_pci_type3_probe_init() for standard CXL device setup
  2. Look for pre-committed RAM regions; if none exist, create one
     using cxl_get_hpa_freespace() + cxl_request_dpa() +
     cxl_create_region()
  3. Convert the region to sysram via devm_cxl_add_sysram() with
     private=true and MMOP_ONLINE_MOVABLE
  4. Register node_private_ops with NP_OPS_MIGRATION | NP_OPS_MEMPOLICY
     so the node is excluded from default allocations

The migrate_to callback uses alloc_migration_target() with
__GFP_THISNODE | __GFP_PRIVATE to keep pages on the target node.

Move struct migration_target_control from mm/internal.h to
include/linux/migrate.h so the driver can use alloc_migration_target()
without depending on mm-internal headers.

Usage:
   echo $PCI_DEV > /sys/bus/pci/drivers/cxl_pci/unbind
   echo $PCI_DEV > /sys/bus/pci/drivers/cxl_mempolicy/bind

Signed-off-by: Gregory Price <[email protected]>
---
 drivers/cxl/Kconfig                           |   2 +
 drivers/cxl/Makefile                          |   2 +
 drivers/cxl/type3_drivers/Kconfig             |   2 +
 drivers/cxl/type3_drivers/Makefile            |   2 +
 .../cxl/type3_drivers/cxl_mempolicy/Kconfig   |  16 +
 .../cxl/type3_drivers/cxl_mempolicy/Makefile  |   4 +
 .../type3_drivers/cxl_mempolicy/mempolicy.c   | 297 ++++++++++++++++++
 include/linux/migrate.h                       |   7 +-
 mm/internal.h                                 |   7 -
 9 files changed, 331 insertions(+), 8 deletions(-)
 create mode 100644 drivers/cxl/type3_drivers/Kconfig
 create mode 100644 drivers/cxl/type3_drivers/Makefile
 create mode 100644 drivers/cxl/type3_drivers/cxl_mempolicy/Kconfig
 create mode 100644 drivers/cxl/type3_drivers/cxl_mempolicy/Makefile
 create mode 100644 drivers/cxl/type3_drivers/cxl_mempolicy/mempolicy.c

diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index f99aa7274d12..1648cdeaa0c9 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -278,4 +278,6 @@ config CXL_ATL
        depends on CXL_REGION
        depends on ACPI_PRMT && AMD_NB
 
+source "drivers/cxl/type3_drivers/Kconfig"
+
 endif
diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile
index 2caa90fa4bf2..94d2b2233bf8 100644
--- a/drivers/cxl/Makefile
+++ b/drivers/cxl/Makefile
@@ -19,3 +19,5 @@ cxl_acpi-y := acpi.o
 cxl_pmem-y := pmem.o security.o
 cxl_mem-y := mem.o
 cxl_pci-y := pci.o
+
+obj-y += type3_drivers/
diff --git a/drivers/cxl/type3_drivers/Kconfig 
b/drivers/cxl/type3_drivers/Kconfig
new file mode 100644
index 000000000000..369b21763856
--- /dev/null
+++ b/drivers/cxl/type3_drivers/Kconfig
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+source "drivers/cxl/type3_drivers/cxl_mempolicy/Kconfig"
diff --git a/drivers/cxl/type3_drivers/Makefile 
b/drivers/cxl/type3_drivers/Makefile
new file mode 100644
index 000000000000..2b82265ff118
--- /dev/null
+++ b/drivers/cxl/type3_drivers/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CXL_MEMPOLICY) += cxl_mempolicy/
diff --git a/drivers/cxl/type3_drivers/cxl_mempolicy/Kconfig 
b/drivers/cxl/type3_drivers/cxl_mempolicy/Kconfig
new file mode 100644
index 000000000000..3c45da237b9f
--- /dev/null
+++ b/drivers/cxl/type3_drivers/cxl_mempolicy/Kconfig
@@ -0,0 +1,16 @@
+config CXL_MEMPOLICY
+       tristate "CXL Private Memory with Mempolicy Support"
+       depends on CXL_PCI
+       depends on CXL_REGION
+       depends on NUMA
+       depends on MIGRATION
+       help
+         Minimal driver for CXL memory devices that registers memory as
+         N_MEMORY_PRIVATE with mempolicy support.  The memory is isolated
+         from default allocations and can only be reached via explicit
+         mempolicy (set_mempolicy or mbind).
+
+         No compression, no PTE controls, the memory behaves like normal
+         DRAM but is excluded from fallback allocations.
+
+         If unsure say 'n'.
diff --git a/drivers/cxl/type3_drivers/cxl_mempolicy/Makefile 
b/drivers/cxl/type3_drivers/cxl_mempolicy/Makefile
new file mode 100644
index 000000000000..dfb58fc88ad9
--- /dev/null
+++ b/drivers/cxl/type3_drivers/cxl_mempolicy/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CXL_MEMPOLICY) += cxl_mempolicy.o
+cxl_mempolicy-y := mempolicy.o
+ccflags-y += -I$(srctree)/drivers/cxl
diff --git a/drivers/cxl/type3_drivers/cxl_mempolicy/mempolicy.c 
b/drivers/cxl/type3_drivers/cxl_mempolicy/mempolicy.c
new file mode 100644
index 000000000000..1c19818eb268
--- /dev/null
+++ b/drivers/cxl/type3_drivers/cxl_mempolicy/mempolicy.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2026 Meta Platforms, Inc. All rights reserved. */
+/*
+ * CXL Mempolicy Driver
+ *
+ * Minimal driver for CXL memory devices that registers memory as
+ * N_MEMORY_PRIVATE with mempolicy support but no PTE controls.  The
+ * memory behaves like normal DRAM but is isolated from default allocations,
+ * it can only be reached via explicit mempolicy (set_mempolicy/mbind).
+ *
+ * Usage:
+ *   1. Unbind device from cxl_pci:
+ *        echo $PCI_DEV > /sys/bus/pci/drivers/cxl_pci/unbind
+ *   2. Bind to cxl_mempolicy:
+ *        echo $PCI_DEV > /sys/bus/pci/drivers/cxl_mempolicy/bind
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/xarray.h>
+#include <linux/node_private.h>
+#include <linux/migrate.h>
+#include <cxl/mailbox.h>
+#include "cxlmem.h"
+#include "cxl.h"
+
+struct cxl_mempolicy_ctx {
+       struct cxl_region *cxlr;
+       struct cxl_endpoint_decoder *cxled;
+       int nid;
+};
+
+static DEFINE_XARRAY(ctx_xa);
+
+static struct cxl_mempolicy_ctx *memdev_to_ctx(struct cxl_memdev *cxlmd)
+{
+       struct pci_dev *pdev = to_pci_dev(cxlmd->dev.parent);
+
+       return xa_load(&ctx_xa, (unsigned long)pdev);
+}
+
+static int cxl_mempolicy_migrate_to(struct list_head *folios, int nid,
+                                   enum migrate_mode mode,
+                                   enum migrate_reason reason,
+                                   unsigned int *nr_succeeded)
+{
+       struct migration_target_control mtc = {
+               .nid = nid,
+               .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE |
+                           __GFP_PRIVATE,
+               .reason = reason,
+       };
+
+       return migrate_pages(folios, alloc_migration_target, NULL,
+                            (unsigned long)&mtc, mode, reason, nr_succeeded);
+}
+
+static void cxl_mempolicy_folio_migrate(struct folio *src, struct folio *dst)
+{
+}
+
+static const struct node_private_ops cxl_mempolicy_ops = {
+       .migrate_to     = cxl_mempolicy_migrate_to,
+       .folio_migrate  = cxl_mempolicy_folio_migrate,
+       .flags = NP_OPS_MIGRATION | NP_OPS_MEMPOLICY,
+};
+
+static struct cxl_region *create_ram_region(struct cxl_memdev *cxlmd)
+{
+       struct cxl_mempolicy_ctx *ctx = memdev_to_ctx(cxlmd);
+       struct cxl_root_decoder *cxlrd;
+       struct cxl_endpoint_decoder *cxled;
+       struct cxl_region *cxlr;
+       resource_size_t ram_size, avail;
+
+       ram_size = cxl_ram_size(cxlmd->cxlds);
+       if (ram_size == 0) {
+               dev_info(&cxlmd->dev, "no RAM capacity available\n");
+               return ERR_PTR(-ENODEV);
+       }
+
+       ram_size = ALIGN_DOWN(ram_size, SZ_256M);
+       if (ram_size == 0) {
+               dev_info(&cxlmd->dev,
+                        "RAM capacity too small (< 256M)\n");
+               return ERR_PTR(-ENOSPC);
+       }
+
+       dev_info(&cxlmd->dev, "creating RAM region for %lld MB\n",
+                ram_size >> 20);
+
+       cxlrd = cxl_get_hpa_freespace(cxlmd, ram_size, &avail);
+       if (IS_ERR(cxlrd)) {
+               dev_err(&cxlmd->dev, "no HPA freespace: %ld\n",
+                       PTR_ERR(cxlrd));
+               return ERR_CAST(cxlrd);
+       }
+
+       cxled = cxl_request_dpa(cxlmd, CXL_PARTMODE_RAM, ram_size);
+       if (IS_ERR(cxled)) {
+               dev_err(&cxlmd->dev, "failed to request DPA: %ld\n",
+                       PTR_ERR(cxled));
+               cxl_put_root_decoder(cxlrd);
+               return ERR_CAST(cxled);
+       }
+
+       cxlr = cxl_create_region(cxlrd, &cxled, 1);
+       cxl_put_root_decoder(cxlrd);
+       if (IS_ERR(cxlr)) {
+               dev_err(&cxlmd->dev, "failed to create region: %ld\n",
+                       PTR_ERR(cxlr));
+               cxl_dpa_free(cxled);
+               return cxlr;
+       }
+
+       ctx->cxled = cxled;
+       dev_info(&cxlmd->dev, "created region %s\n",
+                dev_name(cxl_region_dev(cxlr)));
+       return cxlr;
+}
+
+static int setup_private_node(struct cxl_memdev *cxlmd,
+                             struct cxl_region *cxlr)
+{
+       struct cxl_mempolicy_ctx *ctx = memdev_to_ctx(cxlmd);
+       struct range hpa_range;
+       int rc;
+
+       device_release_driver(cxl_region_dev(cxlr));
+
+       rc = devm_cxl_add_sysram(cxlr, true, MMOP_ONLINE_MOVABLE);
+       if (rc) {
+               dev_err(cxl_region_dev(cxlr),
+                       "failed to add sysram: %d\n", rc);
+               if (device_attach(cxl_region_dev(cxlr)) < 0)
+                       dev_warn(cxl_region_dev(cxlr),
+                                "failed to re-attach driver\n");
+               return rc;
+       }
+
+       rc = cxl_get_region_range(cxlr, &hpa_range);
+       if (rc) {
+               dev_err(cxl_region_dev(cxlr),
+                       "failed to get region range: %d\n", rc);
+               return rc;
+       }
+
+       ctx->nid = phys_to_target_node(hpa_range.start);
+       if (ctx->nid == NUMA_NO_NODE)
+               ctx->nid = memory_add_physaddr_to_nid(hpa_range.start);
+
+       rc = node_private_set_ops(ctx->nid, &cxl_mempolicy_ops);
+       if (rc) {
+               dev_err(cxl_region_dev(cxlr),
+                       "failed to set ops on node %d: %d\n", ctx->nid, rc);
+               ctx->nid = NUMA_NO_NODE;
+               return rc;
+       }
+
+       dev_info(&cxlmd->dev,
+                "node %d registered as private mempolicy memory\n", ctx->nid);
+       return 0;
+}
+
+static int cxl_mempolicy_attach_probe(struct cxl_memdev *cxlmd)
+{
+       struct cxl_region *regions[8];
+       struct cxl_region *cxlr;
+       int nr, i;
+       int rc;
+
+       dev_info(&cxlmd->dev,
+                "cxl_mempolicy attach: looking for regions\n");
+
+       /* Phase 1: look for pre-committed RAM regions */
+       nr = cxl_get_committed_regions(cxlmd, regions, ARRAY_SIZE(regions));
+       for (i = 0; i < nr; i++) {
+               if (cxl_region_mode(regions[i]) != CXL_PARTMODE_RAM) {
+                       put_device(cxl_region_dev(regions[i]));
+                       continue;
+               }
+
+               cxlr = regions[i];
+               rc = setup_private_node(cxlmd, cxlr);
+               put_device(cxl_region_dev(cxlr));
+               if (rc == 0) {
+                       /* Release remaining region references */
+                       for (i++; i < nr; i++)
+                               put_device(cxl_region_dev(regions[i]));
+                       return 0;
+               }
+       }
+
+       /* Phase 2: no committed regions, create one */
+       dev_info(&cxlmd->dev,
+                "no existing regions, creating RAM region\n");
+
+       cxlr = create_ram_region(cxlmd);
+       if (IS_ERR(cxlr)) {
+               rc = PTR_ERR(cxlr);
+               if (rc == -ENODEV) {
+                       dev_info(&cxlmd->dev,
+                                "no RAM capacity: %d\n", rc);
+                       return 0;
+               }
+               return rc;
+       }
+
+       rc = setup_private_node(cxlmd, cxlr);
+       if (rc) {
+               dev_err(&cxlmd->dev,
+                       "failed to setup private node: %d\n", rc);
+               return rc;
+       }
+
+       /* Only take ownership of regions we created (Phase 2) */
+       memdev_to_ctx(cxlmd)->cxlr = cxlr;
+
+       return 0;
+}
+
+static const struct cxl_memdev_attach cxl_mempolicy_attach = {
+       .probe = cxl_mempolicy_attach_probe,
+};
+
+static int cxl_mempolicy_probe(struct pci_dev *pdev,
+                              const struct pci_device_id *id)
+{
+       struct cxl_mempolicy_ctx *ctx;
+       struct cxl_memdev *cxlmd;
+       int rc;
+
+       dev_info(&pdev->dev, "cxl_mempolicy: probing device\n");
+
+       ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
+       ctx->nid = NUMA_NO_NODE;
+
+       rc = xa_insert(&ctx_xa, (unsigned long)pdev, ctx, GFP_KERNEL);
+       if (rc)
+               return rc;
+
+       cxlmd = cxl_pci_type3_probe_init(pdev, &cxl_mempolicy_attach);
+       if (IS_ERR(cxlmd)) {
+               xa_erase(&ctx_xa, (unsigned long)pdev);
+               return PTR_ERR(cxlmd);
+       }
+
+       dev_info(&pdev->dev, "cxl_mempolicy: probe complete\n");
+       return 0;
+}
+
+static void cxl_mempolicy_remove(struct pci_dev *pdev)
+{
+       struct cxl_mempolicy_ctx *ctx = xa_erase(&ctx_xa, (unsigned long)pdev);
+
+       dev_info(&pdev->dev, "cxl_mempolicy: removing device\n");
+
+       if (!ctx)
+               return;
+
+       if (ctx->nid != NUMA_NO_NODE)
+               WARN_ON(node_private_clear_ops(ctx->nid, &cxl_mempolicy_ops));
+
+       if (ctx->cxlr) {
+               cxl_destroy_region(ctx->cxlr);
+               ctx->cxlr = NULL;
+       }
+
+       if (ctx->cxled) {
+               cxl_dpa_free(ctx->cxled);
+               ctx->cxled = NULL;
+       }
+}
+
+static const struct pci_device_id cxl_mempolicy_pci_tbl[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0d93) },
+       { },
+};
+MODULE_DEVICE_TABLE(pci, cxl_mempolicy_pci_tbl);
+
+static struct pci_driver cxl_mempolicy_driver = {
+       .name           = KBUILD_MODNAME,
+       .id_table       = cxl_mempolicy_pci_tbl,
+       .probe          = cxl_mempolicy_probe,
+       .remove         = cxl_mempolicy_remove,
+       .driver = {
+               .probe_type     = PROBE_PREFER_ASYNCHRONOUS,
+       },
+};
+
+module_pci_driver(cxl_mempolicy_driver);
+
+MODULE_DESCRIPTION("CXL: Private Memory with Mempolicy Support");
+MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS("CXL");
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 7b2da3875ff2..1f9fb61f3932 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -10,7 +10,12 @@
 typedef struct folio *new_folio_t(struct folio *folio, unsigned long private);
 typedef void free_folio_t(struct folio *folio, unsigned long private);
 
-struct migration_target_control;
+struct migration_target_control {
+       int nid;                /* preferred node id */
+       nodemask_t *nmask;
+       gfp_t gfp_mask;
+       enum migrate_reason reason;
+};
 
 /**
  * struct movable_operations - Driver page migration
diff --git a/mm/internal.h b/mm/internal.h
index 64467ca774f1..85cd11189854 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1352,13 +1352,6 @@ extern const struct trace_print_flags gfpflag_names[];
 
 void setup_zone_pageset(struct zone *zone);
 
-struct migration_target_control {
-       int nid;                /* preferred node id */
-       nodemask_t *nmask;
-       gfp_t gfp_mask;
-       enum migrate_reason reason;
-};
-
 /*
  * mm/filemap.c
  */
-- 
2.53.0


Reply via email to