On Thu, May 28, 2026 at 03:13:29PM -0700, Dave Jiang wrote:
>
>
> On 5/23/26 2:43 AM, Anisa Su wrote:
> > Replace the no-op ack stub for cxl_rm_extent() with the real teardown:
> > resolve the released DPA range to its region and endpoint decoder,
> > locate the matching dc_extent in cxlr_dax->dc_extents (filtering by
> > cxled, range containment, and tag), and tear down the entire containing
> > tag group atomically through rm_tag_group(). Partial release is not
> > supported.
> >
> > rm_tag_group() invalidates caches once for the whole group (no mappings
> > exist at this point — partial release is not supported, so all members
> > are leaving together), then walks the group's dc_extents and releases
> > each via its devm action installed at online_tag_group() time.
> >
> > cxl_region_invalidate_memregion() becomes non-static and is declared
> > in core.h so rm_tag_group() can flush caches before tearing the group down.
> >
> > When the released range maps to no region (host crashed before
> > persisting acceptance, region destruction raced device release, or the
> > device is confused) the host has nothing to drop, so reply via
> > memdev_release_extent() to keep the device's view consistent.
> >
> > Based on an original patch by Navneet Singh.
> >
> > Signed-off-by: Ira Weiny <[email protected]>
> > Signed-off-by: Anisa Su <[email protected]>
> >
> > ---
> > Changes:
> > [anisa: restructured from the original "Process dynamic partition
> > events" monolith; this commit replaces the stubbed release with the
> > real walk-and-tear-down of the matching tag group.]
> > ---
> > drivers/cxl/core/core.h | 8 +++
> > drivers/cxl/core/extent.c | 101 ++++++++++++++++++++++++++++++++++++++
> > drivers/cxl/core/mbox.c | 19 -------
> > drivers/cxl/core/region.c | 2 +-
> > 4 files changed, 110 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> > index 30b6b05b155b..65daaaadf68e 100644
> > --- a/drivers/cxl/core/core.h
> > +++ b/drivers/cxl/core/core.h
> > @@ -28,6 +28,8 @@ cxled_to_mds(struct cxl_endpoint_decoder *cxled)
> > return container_of(cxlds, struct cxl_memdev_state, cxlds);
> > }
> >
> > +int cxl_region_invalidate_memregion(struct cxl_region *cxlr);
>
> Doesn't this need to go within CONFIG_CXL_REGION?
>
oopsie yes
> > +
> > #ifdef CONFIG_CXL_REGION
> >
> > struct cxl_region_context {
> > @@ -67,6 +69,7 @@ int devm_cxl_add_pmem_region(struct cxl_region *cxlr);
> >
> > int cxl_add_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent,
> > u16 seq_num);
> > +int cxl_rm_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent);
> > int online_tag_group(struct cxl_dc_tag_group *group);
> > #else
> > static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
> > @@ -79,6 +82,11 @@ static inline int cxl_add_extent(struct cxl_memdev_state
> > *mds,
> > {
> > return 0;
> > }
> > +static inline int cxl_rm_extent(struct cxl_memdev_state *mds,
> > + struct cxl_extent *extent)
> > +{
> > + return 0;
> > +}
> > static inline int online_tag_group(struct cxl_dc_tag_group *group)
> > {
> > return 0;
> > diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c
> > index b01507022cff..51116c8139ed 100644
> > --- a/drivers/cxl/core/extent.c
> > +++ b/drivers/cxl/core/extent.c
> > @@ -344,6 +344,107 @@ static void dc_extent_unregister(void *ext)
> > device_unregister(&dc_extent->dev);
> > }
> >
> > +static void rm_tag_group(struct cxl_dc_tag_group *group)
> > +{
> > + struct device *region_dev = &group->cxlr_dax->dev;
> > + struct dc_extent *dc_extent;
> > + unsigned long index;
> > +
> > + /*
> > + * Tagged allocations release atomically. Invalidate caches once
> > + * for the whole group (no mappings exist at this point — partial
> > + * release is not supported, so all members are leaving use
> > + * together) before tearing down each dc_extent device.
> > + *
> > + * Pin @group across the walk: each devm_release_action runs the
> > + * dc_extent_unregister action synchronously, which drops the last
> > + * reference on the dc_extent device and fires dc_extent_release.
> > + * The release decrements group->nr_extents and, on the final
> > + * decrement, frees @group. Without the pin the next iteration's
> > + * xa_find_after() dereferences a freed xarray.
> > + */
> > + cxl_region_invalidate_memregion(group->cxlr_dax->cxlr);
>
> check return value?
>
done. if invalidate_memregion fails, doesn't release and device needs to
retry
> > +
> > + group->nr_extents++;
> > + xa_for_each(&group->dc_extents, index, dc_extent)
> > + devm_release_action(region_dev, dc_extent_unregister,
> > dc_extent);
> > + group->nr_extents--;
> > + if (!group->nr_extents)
> > + free_tag_group(group);
> > +}
> > +
> > +int cxl_rm_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent)
> > +{
> > + u64 start_dpa = le64_to_cpu(extent->start_dpa);
> > + struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
> > + struct cxl_endpoint_decoder *cxled;
> > + struct cxl_dax_region *cxlr_dax;
> > + struct cxl_dc_tag_group *group;
> > + struct dc_extent *dc_extent;
> > + struct cxl_region *cxlr;
> > + struct range dpa_range;
> > + unsigned long idx;
> > + uuid_t tag;
> > +
> > + dpa_range = (struct range) {
> > + .start = start_dpa,
> > + .end = start_dpa + le64_to_cpu(extent->length) - 1,
> > + };
> > +
> > + guard(rwsem_read)(&cxl_rwsem.region);
> > + cxlr = cxl_dpa_to_region(cxlmd, start_dpa, &cxled);
> > + if (!cxlr) {
> > + /*
> > + * No region can happen here for a few reasons:
> > + *
> > + * 1) Extents were accepted and the host crashed/rebooted
> > + * leaving them in an accepted state. On reboot the host
> > + * has not yet created a region to own them.
> > + *
> > + * 2) Region destruction won the race with the device releasing
> > + * all the extents. Here the release will be a duplicate of
> > + * the one sent via region destruction.
> > + *
> > + * 3) The device is confused and releasing extents for which no
> > + * region ever existed.
> > + *
> > + * In all these cases make sure the device knows we are not
> > + * using this extent.
> > + */
> > + memdev_release_extent(mds, &dpa_range);
> > + return -ENXIO;
> > + }
> > +
> > + cxlr_dax = cxlr->cxlr_dax;
>
> Does it need to check if cxlr_dax is NULL?
>
added check. same behavior as if !cxlr, bc if !cxlr_dax, we're not
tracking or using any extents, so safe to reply to device with release
> DJ
>
Thanks,
Anisa
> > + import_uuid(&tag, extent->uuid);
> > +
> > + /*
> > + * Find the dc_extent whose DPA range covers the released range and
> > + * whose tag matches. The release targets the entire containing
> > + * tag group atomically; partial release is not supported.
> > + */
> > + group = NULL;
> > + xa_for_each(&cxlr_dax->dc_extents, idx, dc_extent) {
> > + if (dc_extent->cxled != cxled)
> > + continue;
> > + if (!range_contains(&dc_extent->dpa_range, &dpa_range))
> > + continue;
> > + if (!uuid_equal(&dc_extent->group->uuid, &tag))
> > + continue;
> > + group = dc_extent->group;
> > + break;
> > + }
> > + if (!group) {
> > + dev_err(&cxlr_dax->dev,
> > + "release DPA %pra (%pU) matches no dc_extent\n",
> > + &dpa_range, &tag);
> > + return -EINVAL;
> > + }
> > +
> > + rm_tag_group(group);
> > + return 0;
> > +}
> > +
> > static void cleanup_pending_dc_extent(struct dc_extent *dc_extent)
> > {
> > struct cxl_dc_tag_group *group = dc_extent->group;
> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index 545c48c9c373..70e6c4c9743c 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> > @@ -1587,25 +1587,6 @@ static int handle_add_event(struct cxl_memdev_state
> > *mds,
> > return rc;
> > }
> >
> > -/*
> > - * Stub: ack the release back to the device so it knows we are not
> > - * using the range. A later commit replaces this with the real
> > - * teardown that walks the region's tag group and tears down the
> > - * member dc_extent devices.
> > - */
> > -static int cxl_rm_extent(struct cxl_memdev_state *mds,
> > - struct cxl_extent *extent)
> > -{
> > - u64 start_dpa = le64_to_cpu(extent->start_dpa);
> > - struct range dpa_range = {
> > - .start = start_dpa,
> > - .end = start_dpa + le64_to_cpu(extent->length) - 1,
> > - };
> > -
> > - memdev_release_extent(mds, &dpa_range);
> > - return 0;
> > -}
> > -
> > static char *cxl_dcd_evt_type_str(u8 type)
> > {
> > switch (type) {
> > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> > index 733d77c07493..317630d8bf2e 100644
> > --- a/drivers/cxl/core/region.c
> > +++ b/drivers/cxl/core/region.c
> > @@ -222,7 +222,7 @@ static struct cxl_region_ref *cxl_rr_load(struct
> > cxl_port *port,
> > return xa_load(&port->regions, (unsigned long)cxlr);
> > }
> >
> > -static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
> > +int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
> > {
> > if (!cpu_cache_has_invalidate_memregion()) {
> > if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
>