On Thu, Jan 22, 2026 at 04:55:42AM +0000, Smita Koralahalli wrote: > The current probe time ownership check for Soft Reserved memory based > solely on CXL window intersection is insufficient. dax_hmem probing is not > always guaranteed to run after CXL enumeration and region assembly, which > can lead to incorrect ownership decisions before the CXL stack has > finished publishing windows and assembling committed regions. > > Introduce deferred ownership handling for Soft Reserved ranges that > intersect CXL windows at probe time by scheduling deferred work from > dax_hmem and waiting for the CXL stack to complete enumeration and region > assembly before deciding ownership. > > Evaluate ownership of Soft Reserved ranges based on CXL region > containment. > > - If all Soft Reserved ranges are fully contained within committed CXL > regions, DROP handling Soft Reserved ranges from dax_hmem and allow > dax_cxl to bind. > > - If any Soft Reserved range is not fully claimed by committed CXL > region, tear down all CXL regions and REGISTER the Soft Reserved > ranges with dax_hmem instead.
Question about the teardown below.. > > While ownership resolution is pending, gate dax_cxl probing to avoid > binding prematurely. > > This enforces a strict ownership. Either CXL fully claims the Soft > Reserved ranges or it relinquishes it entirely. > > Co-developed-by: Dan Williams <[email protected]> > Signed-off-by: Dan Williams <[email protected]> > Signed-off-by: Smita Koralahalli <[email protected]> > --- > drivers/cxl/core/region.c | 25 ++++++++++++ > drivers/cxl/cxl.h | 2 + > drivers/dax/cxl.c | 9 +++++ > drivers/dax/hmem/hmem.c | 81 ++++++++++++++++++++++++++++++++++++++- > 4 files changed, 115 insertions(+), 2 deletions(-) > > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c > index 9827a6dd3187..6c22a2d4abbb 100644 > --- a/drivers/cxl/core/region.c > +++ b/drivers/cxl/core/region.c > @@ -3875,6 +3875,31 @@ static int cxl_region_debugfs_poison_clear(void *data, > u64 offset) > DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, > cxl_region_debugfs_poison_clear, "%llx\n"); > > +static int cxl_region_teardown_cb(struct device *dev, void *data) > +{ > + struct cxl_root_decoder *cxlrd; > + struct cxl_region *cxlr; > + struct cxl_port *port; > + > + if (!is_cxl_region(dev)) > + return 0; > + > + cxlr = to_cxl_region(dev); > + > + cxlrd = to_cxl_root_decoder(cxlr->dev.parent); > + port = cxlrd_to_port(cxlrd); > + > + devm_release_action(port->uport_dev, unregister_region, cxlr); > + > + return 0; > +} > + > +void cxl_region_teardown_all(void) > +{ > + bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_region_teardown_cb); > +} > +EXPORT_SYMBOL_GPL(cxl_region_teardown_all); > + > static int cxl_region_contains_sr_cb(struct device *dev, void *data) > { > struct resource *res = data; > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h > index b0ff6b65ea0b..1864d35d5f69 100644 > --- a/drivers/cxl/cxl.h > +++ b/drivers/cxl/cxl.h > @@ -907,6 +907,7 @@ int cxl_add_to_region(struct cxl_endpoint_decoder *cxled); > struct cxl_dax_region *to_cxl_dax_region(struct device *dev); > u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa); > bool cxl_region_contains_soft_reserve(const struct resource *res); > +void cxl_region_teardown_all(void); > #else > static inline bool is_cxl_pmem_region(struct device *dev) > { > @@ -933,6 +934,7 @@ static inline bool cxl_region_contains_soft_reserve(const > struct resource *res) > { > return false; > } > +static inline void cxl_region_teardown_all(void) { } > #endif > > void cxl_endpoint_parse_cdat(struct cxl_port *port); > diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c > index 13cd94d32ff7..b7e90d6dd888 100644 > --- a/drivers/dax/cxl.c > +++ b/drivers/dax/cxl.c > @@ -14,6 +14,15 @@ static int cxl_dax_region_probe(struct device *dev) > struct dax_region *dax_region; > struct dev_dax_data data; > > + switch (dax_cxl_mode) { > + case DAX_CXL_MODE_DEFER: > + return -EPROBE_DEFER; > + case DAX_CXL_MODE_REGISTER: > + return -ENODEV; > + case DAX_CXL_MODE_DROP: > + break; > + } > + > if (nid == NUMA_NO_NODE) > nid = memory_add_physaddr_to_nid(cxlr_dax->hpa_range.start); > > diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c > index 1e3424358490..bcb57d8678d7 100644 > --- a/drivers/dax/hmem/hmem.c > +++ b/drivers/dax/hmem/hmem.c > @@ -3,6 +3,7 @@ > #include <linux/memregion.h> > #include <linux/module.h> > #include <linux/dax.h> > +#include "../../cxl/cxl.h" > #include "../bus.h" > > static bool region_idle; > @@ -58,9 +59,15 @@ static void release_hmem(void *pdev) > platform_device_unregister(pdev); > } > > +struct dax_defer_work { > + struct platform_device *pdev; > + struct work_struct work; > +}; > + > static int hmem_register_device(struct device *host, int target_nid, > const struct resource *res) > { > + struct dax_defer_work *work = dev_get_drvdata(host); > struct platform_device *pdev; > struct memregion_info info; > long id; > @@ -69,8 +76,18 @@ static int hmem_register_device(struct device *host, int > target_nid, > if (IS_ENABLED(CONFIG_DEV_DAX_CXL) && > region_intersects(res->start, resource_size(res), IORESOURCE_MEM, > IORES_DESC_CXL) != REGION_DISJOINT) { > - dev_dbg(host, "deferring range to CXL: %pr\n", res); > - return 0; > + switch (dax_cxl_mode) { > + case DAX_CXL_MODE_DEFER: > + dev_dbg(host, "deferring range to CXL: %pr\n", res); > + schedule_work(&work->work); > + return 0; > + case DAX_CXL_MODE_REGISTER: > + dev_dbg(host, "registering CXL range: %pr\n", res); > + break; > + case DAX_CXL_MODE_DROP: > + dev_dbg(host, "dropping CXL range: %pr\n", res); > + return 0; > + } > } > > rc = region_intersects_soft_reserve(res->start, resource_size(res)); > @@ -123,8 +140,67 @@ static int hmem_register_device(struct device *host, int > target_nid, > return rc; > } > > +static int cxl_contains_soft_reserve(struct device *host, int target_nid, > + const struct resource *res) > +{ > + if (region_intersects(res->start, resource_size(res), IORESOURCE_MEM, > + IORES_DESC_CXL) != REGION_DISJOINT) { > + if (!cxl_region_contains_soft_reserve(res)) > + return 1; > + } > + > + return 0; > +} > + > +static void process_defer_work(struct work_struct *_work) > +{ > + struct dax_defer_work *work = container_of(_work, typeof(*work), work); > + struct platform_device *pdev = work->pdev; > + int rc; > + > + /* relies on cxl_acpi and cxl_pci having had a chance to load */ > + wait_for_device_probe(); > + > + rc = walk_hmem_resources(&pdev->dev, cxl_contains_soft_reserve); > + > + if (!rc) { > + dax_cxl_mode = DAX_CXL_MODE_DROP; > + rc = bus_rescan_devices(&cxl_bus_type); > + if (rc) > + dev_warn(&pdev->dev, "CXL bus rescan failed: %d\n", rc); > + } else { > + dax_cxl_mode = DAX_CXL_MODE_REGISTER; > + cxl_region_teardown_all(); The region teardown appears as a one-shot sweep of existing regions without considering regions not yet assembled. After this point will a newly arriving region, be racing with HMEM again to create a DAX region? > + } > + > + walk_hmem_resources(&pdev->dev, hmem_register_device); > +} > + > +static void kill_defer_work(void *_work) > +{ > + struct dax_defer_work *work = container_of(_work, typeof(*work), work); > + > + cancel_work_sync(&work->work); > + kfree(work); > +} > + > static int dax_hmem_platform_probe(struct platform_device *pdev) > { > + struct dax_defer_work *work = kzalloc(sizeof(*work), GFP_KERNEL); > + int rc; > + > + if (!work) > + return -ENOMEM; > + > + work->pdev = pdev; > + INIT_WORK(&work->work, process_defer_work); > + > + rc = devm_add_action_or_reset(&pdev->dev, kill_defer_work, work); > + if (rc) > + return rc; > + > + platform_set_drvdata(pdev, work); > + > return walk_hmem_resources(&pdev->dev, hmem_register_device); > } > > @@ -174,3 +250,4 @@ MODULE_ALIAS("platform:hmem_platform*"); > MODULE_DESCRIPTION("HMEM DAX: direct access to 'specific purpose' memory"); > MODULE_LICENSE("GPL v2"); > MODULE_AUTHOR("Intel Corporation"); > +MODULE_IMPORT_NS("CXL"); > -- > 2.17.1 >

