Hi Jonathan,
On 6/9/2025 5:54 AM, Jonathan Cameron wrote:
On Tue, 3 Jun 2025 22:19:47 +0000
Smita Koralahalli <smita.koralahallichannabasa...@amd.com> wrote:
Reworked from a patch by Alison Schofield <alison.schofi...@intel.com>
Previously, when CXL regions were created through autodiscovery and their
resources overlapped with SOFT RESERVED ranges, the soft reserved resource
remained in place after region teardown. This left the HPA range
unavailable for reuse even after the region was destroyed.
Enhance the logic to reliably remove SOFT RESERVED resources associated
with a region, regardless of alignment or hierarchy in the iomem tree.
Link:
https://lore.kernel.org/linux-cxl/29312c0765224ae76862d59a17748c8188fb95f1.1692638817.git.alison.schofi...@intel.com/
Co-developed-by: Alison Schofield <alison.schofi...@intel.com>
Signed-off-by: Alison Schofield <alison.schofi...@intel.com>
Co-developed-by: Terry Bowman <terry.bow...@amd.com>
Signed-off-by: Terry Bowman <terry.bow...@amd.com>
Signed-off-by: Smita Koralahalli <smita.koralahallichannabasa...@amd.com>
---
drivers/cxl/acpi.c | 2 +
drivers/cxl/core/region.c | 151 ++++++++++++++++++++++++++++++++++++++
drivers/cxl/cxl.h | 5 ++
3 files changed, 158 insertions(+)
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 978f63b32b41..1b1388feb36d 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -823,6 +823,8 @@ static void cxl_softreserv_mem_work_fn(struct work_struct
*work)
* and cxl_mem drivers are loaded.
*/
wait_for_device_probe();
+
+ cxl_region_softreserv_update();
}
static DECLARE_WORK(cxl_sr_work, cxl_softreserv_mem_work_fn);
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 109b8a98c4c7..3a5ca44d65f3 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3443,6 +3443,157 @@ int cxl_add_to_region(struct cxl_port *root, struct
cxl_endpoint_decoder *cxled)
}
EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL");
+static int add_soft_reserved(resource_size_t start, resource_size_t len,
+ unsigned long flags)
+{
+ struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
+ int rc;
+
+ if (!res)
+ return -ENOMEM;
+
+ *res = DEFINE_RES_MEM_NAMED(start, len, "Soft Reserved");
+
+ res->desc = IORES_DESC_SOFT_RESERVED;
+ res->flags = flags;
I'm a bit doubtful about using a define that restricts a bunch of the
elements, then overriding 2 of them immediate after.
DEFINE_RES_NAMED_DESC(start, len, "Soft Reserved", flags | IORESOURCE_MEM,
IORES_DESC_SOFT_RESERVED);
Sure, I will change to the above.
+ rc = insert_resource(&iomem_resource, res);
+ if (rc) {
+ kfree(res);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void remove_soft_reserved(struct cxl_region *cxlr, struct resource
*soft,
+ resource_size_t start, resource_size_t end)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+ resource_size_t new_start, new_end;
+ int rc;
+
+ /* Prevent new usage while removing or adjusting the resource */
+ guard(mutex)(&cxlrd->range_lock);
+
+ /* Aligns at both resource start and end */
+ if (soft->start == start && soft->end == end)
+ goto remove;
+
Might be a clearer flow with else if rather than
a goto.
Okay will change.
+ /* Aligns at either resource start or end */
+ if (soft->start == start || soft->end == end) {
+ if (soft->start == start) {
+ new_start = end + 1;
+ new_end = soft->end;
+ } else {
+ new_start = soft->start;
+ new_end = start - 1;
+ }
+
+ rc = add_soft_reserved(new_start, new_end - new_start + 1,
+ soft->flags);
This is the remnant of what was there before, but the flags are from
the bit we are dropping? That feels odd. They might well be the same
but maybe we need to make that explicit?
Okay. Probably I can update code to clarify this by adding a comment, or
I can also filter or copy only the relevant flag bits if you think
that's more appropriate.
+ if (rc)
+ dev_warn(&cxlr->dev, "cannot add new soft reserved resource
at %pa\n",
+ &new_start);
+
+ /* Remove the original Soft Reserved resource */
+ goto remove;
+ }
+
+ /*
+ * No alignment. Attempt a 3-way split that removes the part of
+ * the resource the region occupied, and then creates new soft
+ * reserved resources for the leading and trailing addr space.
+ */
+ new_start = soft->start;
+ new_end = soft->end;
+
+ rc = add_soft_reserved(new_start, start - new_start, soft->flags);
+ if (rc)
+ dev_warn(&cxlr->dev, "cannot add new soft reserved resource at
%pa\n",
+ &new_start);
+
+ rc = add_soft_reserved(end + 1, new_end - end, soft->flags);
+ if (rc)
+ dev_warn(&cxlr->dev, "cannot add new soft reserved resource at %pa +
1\n",
+ &end);
+
+remove:
+ rc = remove_resource(soft);
+ if (rc)
+ dev_warn(&cxlr->dev, "cannot remove soft reserved resource
%pr\n",
+ soft);
+}
+
+/*
+ * normalize_resource
+ *
+ * The walk_iomem_res_desc() returns a copy of a resource, not a reference
+ * to the actual resource in the iomem_resource tree. As a result,
+ * __release_resource() which relies on pointer equality will fail.
Probably want some statement on why nothing can race with this give
the resource_lock is not being held.
Hmm, probably you are right that normalize_resource() is accessing the
resource tree without holding resource_lock, which could lead to races.
I will update the function to take a read_lock(&resource_lock) before
walking res->parent->child..
Let me know if you'd prefer this locking be handled before calling
normalize_resource() instead..
+ *
+ * This helper walks the children of the resource's parent to find and
+ * return the original resource pointer that matches the given resource's
+ * start and end addresses.
+ *
+ * Return: Pointer to the matching original resource in iomem_resource, or
+ * NULL if not found or invalid input.
+ */
+static struct resource *normalize_resource(struct resource *res)
+{
+ if (!res || !res->parent)
+ return NULL;
+
+ for (struct resource *res_iter = res->parent->child;
+ res_iter != NULL; res_iter = res_iter->sibling) {
I'd move the res_iter != NULL to previous line as it is still under 80 chars.
Sure will fix.
+ if ((res_iter->start == res->start) &&
+ (res_iter->end == res->end))
+ return res_iter;
+ }
+
+ return NULL;
+}
+
+static int __cxl_region_softreserv_update(struct resource *soft,
+ void *_cxlr)
+{
+ struct cxl_region *cxlr = _cxlr;
+ struct resource *res = cxlr->params.res;
+
+ /* Skip non-intersecting soft-reserved regions */
+ if (soft->end < res->start || soft->start > res->end)
+ return 0;
+
+ soft = normalize_resource(soft);
+ if (!soft)
+ return -EINVAL;
+
+ remove_soft_reserved(cxlr, soft, res->start, res->end);
+
+ return 0;
+}
+
+int cxl_region_softreserv_update(void)
+{
+ struct device *dev = NULL;
+
+ while ((dev = bus_find_next_device(&cxl_bus_type, dev))) {
+ struct device *put_dev __free(put_device) = dev;
This free is a little bit outside of the constructor and destructor
together rules.
I wonder if bus_for_each_dev() is cleaner here or is there a reason
we have to have released the subsystem lock + grabbed the device
one before calling __cxl_region_softreserv_update?
Thanks for the suggestion. I will replace the bus_find_next_device()
with bus_for_each_dev(). I think bus_for_each_dev() simplifies the flow
as there's also no need to call put_device() explicitly.
Thanks
Smita
+ struct cxl_region *cxlr;
+
+ if (!is_cxl_region(dev))
+ continue;
If you stick to bus_find_X I wonder if we should define helpers for
the match function and use bus_find_device()
+
+ cxlr = to_cxl_region(dev);
+
+ walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
+ IORESOURCE_MEM, 0, -1, cxlr,
+ __cxl_region_softreserv_update);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_region_softreserv_update, "CXL");