Add the CXL sysram region for direct memory hotplug of CXL RAM regions.

This region eliminates the intermediate dax_region/dax device layer by
directly performing memory hotplug operations.

Key features:
- Supports memory tier integration for proper NUMA placement
- Uses the CXL_SYSRAM_ONLINE_* Kconfig options for default online type
- Automatically hotplugs memory on probe if online type is configured
- Will be extended to support private memory nodes in the future

The driver registers a sysram_regionN device as a child of the CXL
region, managing the memory hotplug lifecycle through device add/remove.

Signed-off-by: Gregory Price <[email protected]>
---
 drivers/cxl/core/Makefile        |   1 +
 drivers/cxl/core/core.h          |   4 +
 drivers/cxl/core/port.c          |   2 +
 drivers/cxl/core/region_sysram.c | 351 +++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h                |  48 +++++
 5 files changed, 406 insertions(+)
 create mode 100644 drivers/cxl/core/region_sysram.c

diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index d3ec8aea64c5..d7ce52c50810 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -18,6 +18,7 @@ cxl_core-$(CONFIG_TRACING) += trace.o
 cxl_core-$(CONFIG_CXL_REGION) += region.o
 cxl_core-$(CONFIG_CXL_REGION) += region_dax.o
 cxl_core-$(CONFIG_CXL_REGION) += region_pmem.o
+cxl_core-$(CONFIG_CXL_REGION) += region_sysram.o
 cxl_core-$(CONFIG_CXL_MCE) += mce.o
 cxl_core-$(CONFIG_CXL_FEATURES) += features.o
 cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 6e1f695fd155..973bbcae43f7 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -35,6 +35,7 @@ extern struct device_attribute dev_attr_delete_region;
 extern struct device_attribute dev_attr_region;
 extern const struct device_type cxl_pmem_region_type;
 extern const struct device_type cxl_dax_region_type;
+extern const struct device_type cxl_sysram_type;
 extern const struct device_type cxl_region_type;
 
 int cxl_decoder_detach(struct cxl_region *cxlr,
@@ -46,6 +47,7 @@ int cxl_decoder_detach(struct cxl_region *cxlr,
 #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
 #define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type)
 #define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type)
+#define CXL_SYSRAM_TYPE(x) (&cxl_sysram_type)
 int cxl_region_init(void);
 void cxl_region_exit(void);
 int cxl_get_poison_by_endpoint(struct cxl_port *port);
@@ -54,6 +56,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct 
cxl_memdev *cxlmd,
                   u64 dpa);
 int devm_cxl_add_dax_region(struct cxl_region *cxlr, enum dax_driver_type);
 int devm_cxl_add_pmem_region(struct cxl_region *cxlr);
+int devm_cxl_add_sysram(struct cxl_region *cxlr, enum mmop online_type);
 
 #else
 static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
@@ -88,6 +91,7 @@ static inline void cxl_region_exit(void)
 #define SET_CXL_REGION_ATTR(x)
 #define CXL_PMEM_REGION_TYPE(x) NULL
 #define CXL_DAX_REGION_TYPE(x) NULL
+#define CXL_SYSRAM_TYPE(x) NULL
 #endif
 
 struct cxl_send_command;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 5c82e6f32572..d6e82b3c2b64 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -66,6 +66,8 @@ static int cxl_device_id(const struct device *dev)
                return CXL_DEVICE_PMEM_REGION;
        if (dev->type == CXL_DAX_REGION_TYPE())
                return CXL_DEVICE_DAX_REGION;
+       if (dev->type == CXL_SYSRAM_TYPE())
+               return CXL_DEVICE_SYSRAM;
        if (is_cxl_port(dev)) {
                if (is_cxl_root(to_cxl_port(dev)))
                        return CXL_DEVICE_ROOT;
diff --git a/drivers/cxl/core/region_sysram.c b/drivers/cxl/core/region_sysram.c
new file mode 100644
index 000000000000..47a415deb352
--- /dev/null
+++ b/drivers/cxl/core/region_sysram.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2026 Meta Platforms, Inc. All rights reserved. */
+/*
+ * CXL Sysram Region - Direct memory hotplug for CXL RAM regions
+ *
+ * This interface directly performs memory hotplug for CXL RAM regions,
+ * eliminating the indirection through DAX.
+ */
+
+#include <linux/memory_hotplug.h>
+#include <linux/memory-tiers.h>
+#include <linux/memory.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "core.h"
+
+static const char *sysram_res_name = "System RAM (CXL)";
+
+/**
+ * cxl_region_find_sysram - Find the sysram device associated with a region
+ * @cxlr: The CXL region
+ *
+ * Finds and returns the sysram child device of a CXL region.
+ * The caller must release the device reference with put_device()
+ * when done with the returned pointer.
+ *
+ * Return: Pointer to cxl_sysram, or NULL if not found
+ */
+struct cxl_sysram *cxl_region_find_sysram(struct cxl_region *cxlr)
+{
+       struct cxl_sysram *sysram;
+       struct device *sdev;
+       char sname[32];
+
+       snprintf(sname, sizeof(sname), "sysram_region%d", cxlr->id);
+       sdev = device_find_child_by_name(&cxlr->dev, sname);
+       if (!sdev)
+               return NULL;
+
+       sysram = to_cxl_sysram(sdev);
+       return sysram;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_region_find_sysram, "CXL");
+
+static int sysram_get_numa_node(struct cxl_region *cxlr)
+{
+       struct cxl_region_params *p = &cxlr->params;
+       int nid;
+
+       nid = phys_to_target_node(p->res->start);
+       if (nid == NUMA_NO_NODE)
+               nid = memory_add_physaddr_to_nid(p->res->start);
+
+       return nid;
+}
+
+static int sysram_hotplug_add(struct cxl_sysram *sysram, enum mmop online_type)
+{
+       struct resource *res;
+       mhp_t mhp_flags;
+       int rc;
+
+       if (sysram->res)
+               return -EBUSY;
+
+       res = request_mem_region(sysram->hpa_range.start,
+                                range_len(&sysram->hpa_range),
+                                sysram->res_name);
+       if (!res)
+               return -EBUSY;
+
+       sysram->res = res;
+
+       /*
+        * Set flags appropriate for System RAM. Leave ..._BUSY clear
+        * so that add_memory() can add a child resource.
+        */
+       res->flags = IORESOURCE_SYSTEM_RAM;
+
+       mhp_flags = MHP_NID_IS_MGID;
+
+       /*
+        * Ensure that future kexec'd kernels will not treat
+        * this as RAM automatically.
+        */
+       rc = __add_memory_driver_managed(sysram->mgid,
+                                        sysram->hpa_range.start,
+                                        range_len(&sysram->hpa_range),
+                                        sysram_res_name, mhp_flags,
+                                        online_type);
+       if (rc) {
+               remove_resource(res);
+               kfree(res);
+               sysram->res = NULL;
+               return rc;
+       }
+
+       return 0;
+}
+
+static int sysram_hotplug_remove(struct cxl_sysram *sysram)
+{
+       int rc;
+
+       if (!sysram->res)
+               return 0;
+
+       rc = offline_and_remove_memory(sysram->hpa_range.start,
+                                      range_len(&sysram->hpa_range));
+       if (rc)
+               return rc;
+
+       if (sysram->res) {
+               remove_resource(sysram->res);
+               kfree(sysram->res);
+               sysram->res = NULL;
+       }
+
+       return 0;
+}
+
+int cxl_sysram_offline_and_remove(struct cxl_sysram *sysram)
+{
+       return sysram_hotplug_remove(sysram);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_sysram_offline_and_remove, "CXL");
+
+static void cxl_sysram_release(struct device *dev)
+{
+       struct cxl_sysram *sysram = to_cxl_sysram(dev);
+
+       if (sysram->res)
+               sysram_hotplug_remove(sysram);
+
+       kfree(sysram->res_name);
+
+       if (sysram->mgid >= 0)
+               memory_group_unregister(sysram->mgid);
+
+       if (sysram->mtype)
+               clear_node_memory_type(sysram->numa_node, sysram->mtype);
+
+       kfree(sysram);
+}
+
+static ssize_t hotplug_store(struct device *dev,
+                            struct device_attribute *attr,
+                            const char *buf, size_t len)
+{
+       struct cxl_sysram *sysram = to_cxl_sysram(dev);
+       int online_type, rc;
+
+       online_type = mhp_online_type_from_str(buf);
+       if (online_type < 0)
+               return online_type;
+
+       if (online_type == MMOP_OFFLINE)
+               rc = sysram_hotplug_remove(sysram);
+       else
+               rc = sysram_hotplug_add(sysram, online_type);
+
+       if (rc)
+               dev_warn(dev, "hotplug %s failed: %d\n",
+                        online_type == MMOP_OFFLINE ? "offline" : "online", 
rc);
+
+       return rc ? rc : len;
+}
+static DEVICE_ATTR_WO(hotplug);
+
+static struct attribute *cxl_sysram_attrs[] = {
+       &dev_attr_hotplug.attr,
+       NULL
+};
+
+static const struct attribute_group cxl_sysram_attribute_group = {
+       .attrs = cxl_sysram_attrs,
+};
+
+static const struct attribute_group *cxl_sysram_attribute_groups[] = {
+       &cxl_base_attribute_group,
+       &cxl_sysram_attribute_group,
+       NULL
+};
+
+const struct device_type cxl_sysram_type = {
+       .name = "cxl_sysram",
+       .release = cxl_sysram_release,
+       .groups = cxl_sysram_attribute_groups,
+};
+
+static bool is_cxl_sysram(struct device *dev)
+{
+       return dev->type == &cxl_sysram_type;
+}
+
+struct cxl_sysram *to_cxl_sysram(struct device *dev)
+{
+       if (dev_WARN_ONCE(dev, !is_cxl_sysram(dev),
+                         "not a cxl_sysram device\n"))
+               return NULL;
+       return container_of(dev, struct cxl_sysram, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_sysram, "CXL");
+
+struct device *cxl_sysram_dev(struct cxl_sysram *sysram)
+{
+       return &sysram->dev;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_sysram_dev, "CXL");
+
+static struct lock_class_key cxl_sysram_key;
+
+static enum mmop cxl_sysram_get_default_online_type(void)
+{
+       if (IS_ENABLED(CONFIG_CXL_SYSRAM_ONLINE_TYPE_SYSTEM_DEFAULT))
+               return mhp_get_default_online_type();
+       if (IS_ENABLED(CONFIG_CXL_SYSRAM_ONLINE_TYPE_MOVABLE))
+               return MMOP_ONLINE_MOVABLE;
+       if (IS_ENABLED(CONFIG_CXL_SYSRAM_ONLINE_TYPE_NORMAL))
+               return MMOP_ONLINE;
+       return MMOP_OFFLINE;
+}
+
+static struct cxl_sysram *cxl_sysram_alloc(struct cxl_region *cxlr)
+{
+       struct cxl_sysram *sysram __free(kfree) = NULL;
+       struct device *dev;
+
+       sysram = kzalloc(sizeof(*sysram), GFP_KERNEL);
+       if (!sysram)
+               return ERR_PTR(-ENOMEM);
+
+       sysram->online_type = cxl_sysram_get_default_online_type();
+       sysram->last_hotplug_cmd = MMOP_OFFLINE;
+       sysram->numa_node = -1;
+       sysram->mgid = -1;
+
+       dev = &sysram->dev;
+       sysram->cxlr = cxlr;
+       device_initialize(dev);
+       lockdep_set_class(&dev->mutex, &cxl_sysram_key);
+       device_set_pm_not_required(dev);
+       dev->parent = &cxlr->dev;
+       dev->bus = &cxl_bus_type;
+       dev->type = &cxl_sysram_type;
+
+       return_ptr(sysram);
+}
+
+static void sysram_unregister(void *_sysram)
+{
+       struct cxl_sysram *sysram = _sysram;
+
+       device_unregister(&sysram->dev);
+}
+
+int devm_cxl_add_sysram(struct cxl_region *cxlr, enum mmop online_type)
+{
+       struct cxl_sysram *sysram __free(put_cxl_sysram) = NULL;
+       struct memory_dev_type *mtype;
+       struct range hpa_range;
+       struct device *dev;
+       int adist = MEMTIER_DEFAULT_LOWTIER_ADISTANCE;
+       int numa_node;
+       int rc;
+
+       rc = cxl_region_get_hpa_range(cxlr, &hpa_range);
+       if (rc)
+               return rc;
+
+       hpa_range = memory_block_align_range(&hpa_range);
+       if (hpa_range.start >= hpa_range.end) {
+               dev_warn(&cxlr->dev, "region too small after alignment\n");
+               return -ENOSPC;
+       }
+
+       sysram = cxl_sysram_alloc(cxlr);
+       if (IS_ERR(sysram))
+               return PTR_ERR(sysram);
+
+       sysram->hpa_range = hpa_range;
+
+       sysram->res_name = kasprintf(GFP_KERNEL, "cxl_sysram%d", cxlr->id);
+       if (!sysram->res_name)
+               return -ENOMEM;
+
+       /* Override default online type if caller specified one */
+       if (online_type >= 0)
+               sysram->online_type = online_type;
+
+       dev = &sysram->dev;
+
+       rc = dev_set_name(dev, "sysram_region%d", cxlr->id);
+       if (rc)
+               return rc;
+
+       /* Setup memory tier before adding device */
+       numa_node = sysram_get_numa_node(cxlr);
+       if (numa_node < 0) {
+               dev_warn(&cxlr->dev, "rejecting region with invalid node: %d\n",
+                        numa_node);
+               return -EINVAL;
+       }
+       sysram->numa_node = numa_node;
+
+       mt_calc_adistance(numa_node, &adist);
+       mtype = mt_get_memory_type(adist);
+       if (IS_ERR(mtype))
+               return PTR_ERR(mtype);
+       sysram->mtype = mtype;
+
+       init_node_memory_type(numa_node, mtype);
+
+       /* Register memory group for this region */
+       rc = memory_group_register_static(numa_node,
+                                         PFN_UP(range_len(&hpa_range)));
+       if (rc < 0)
+               return rc;
+       sysram->mgid = rc;
+
+       rc = device_add(dev);
+       if (rc)
+               return rc;
+
+       dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
+               dev_name(dev));
+
+       /*
+        * Dynamic capacity regions (DCD) will have memory added later.
+        * For static RAM regions, hotplug the entire range now.
+        */
+       if (cxlr->mode != CXL_PARTMODE_RAM)
+               goto out;
+
+       /* If default online_type is a valid online mode, immediately hotplug */
+       if (sysram->online_type > MMOP_OFFLINE) {
+               rc = sysram_hotplug_add(sysram, sysram->online_type);
+               if (rc)
+                       dev_warn(dev, "hotplug failed: %d\n", rc);
+               else
+                       sysram->last_hotplug_cmd = sysram->online_type;
+       }
+
+out:
+       return devm_add_action_or_reset(&cxlr->dev, sysram_unregister,
+                                       no_free_ptr(sysram));
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_sysram, "CXL");
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index f899f240f229..8e8342fd4fde 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -607,6 +607,34 @@ struct cxl_dax_region {
        enum dax_driver_type dax_driver;
 };
 
+/**
+ * struct cxl_sysram - CXL SysRAM region for system memory hotplug
+ * @dev: device for this sysram
+ * @cxlr: parent cxl_region
+ * @online_type: Default memory online type for new hotplug ops (MMOP_* value)
+ * @last_hotplug_cmd: Last hotplug command submitted (MMOP_* value)
+ * @hpa_range: Host physical address range for the region
+ * @res_name: Resource name for the memory region
+ * @res: Memory resource (set when hotplugged)
+ * @mgid: Memory group id
+ * @mtype: Memory tier type
+ * @numa_node: NUMA node for this memory
+ *
+ * Device that directly performs memory hotplug for CXL RAM regions.
+ */
+struct cxl_sysram {
+       struct device dev;
+       struct cxl_region *cxlr;
+       enum mmop online_type;
+       int last_hotplug_cmd;
+       struct range hpa_range;
+       const char *res_name;
+       struct resource *res;
+       int mgid;
+       struct memory_dev_type *mtype;
+       int numa_node;
+};
+
 /**
  * struct cxl_port - logical collection of upstream port devices and
  *                  downstream port devices to construct a CXL memory
@@ -807,6 +835,7 @@ DEFINE_FREE(put_cxl_port, struct cxl_port *, if 
(!IS_ERR_OR_NULL(_T)) put_device
 DEFINE_FREE(put_cxl_root_decoder, struct cxl_root_decoder *, if 
(!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev))
 DEFINE_FREE(put_cxl_region, struct cxl_region *, if (!IS_ERR_OR_NULL(_T)) 
put_device(&_T->dev))
 DEFINE_FREE(put_cxl_dax_region, struct cxl_dax_region *, if 
(!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))
+DEFINE_FREE(put_cxl_sysram, struct cxl_sysram *, if (!IS_ERR_OR_NULL(_T)) 
put_device(&_T->dev))
 
 int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd);
 void cxl_bus_rescan(void);
@@ -889,6 +918,7 @@ void cxl_destroy_region(struct cxl_region *cxlr);
 struct device *cxl_region_dev(struct cxl_region *cxlr);
 enum cxl_partition_mode cxl_region_mode(struct cxl_region *cxlr);
 int cxl_get_region_range(struct cxl_region *cxlr, struct range *range);
+struct cxl_sysram *cxl_region_find_sysram(struct cxl_region *cxlr);
 int cxl_get_committed_regions(struct cxl_memdev *cxlmd,
                              struct cxl_region **regions, int max_regions);
 struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
@@ -936,6 +966,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
 #define CXL_DEVICE_PMEM_REGION         7
 #define CXL_DEVICE_DAX_REGION          8
 #define CXL_DEVICE_PMU                 9
+#define CXL_DEVICE_SYSRAM              10
 
 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
 #define CXL_MODALIAS_FMT "cxl:t%d"
@@ -954,6 +985,10 @@ bool is_cxl_pmem_region(struct device *dev);
 struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
 int cxl_add_to_region(struct cxl_endpoint_decoder *cxled);
 struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
+struct cxl_sysram *to_cxl_sysram(struct device *dev);
+struct device *cxl_sysram_dev(struct cxl_sysram *sysram);
+int devm_cxl_add_sysram(struct cxl_region *cxlr, enum mmop online_type);
+int cxl_sysram_offline_and_remove(struct cxl_sysram *sysram);
 u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa);
 #else
 static inline bool is_cxl_pmem_region(struct device *dev)
@@ -972,6 +1007,19 @@ static inline struct cxl_dax_region 
*to_cxl_dax_region(struct device *dev)
 {
        return NULL;
 }
+static inline struct cxl_sysram *to_cxl_sysram(struct device *dev)
+{
+       return NULL;
+}
+static inline int devm_cxl_add_sysram(struct cxl_region *cxlr,
+                                     enum mmop online_type)
+{
+       return -ENXIO;
+}
+static inline int cxl_sysram_offline_and_remove(struct cxl_sysram *sysram)
+{
+       return -ENXIO;
+}
 static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint,
                                               u64 spa)
 {
-- 
2.53.0


Reply via email to