Re: [PATCH v3 17/25] cxl/core: Return endpoint decoder information from region search
On 8/16/24 7:44 AM, Ira Weiny wrote: > cxl_dpa_to_region() finds the region from a tuple. > The search involves finding the device endpoint decoder as well. > > Dynamic capacity extent processing uses the endpoint decoder HPA > information to calculate the HPA offset. In addition, well behaved > extents should be contained within an endpoint decoder. > > Return the endpoint decoder found to be used in subsequent DCD code. > > Signed-off-by: Ira Weiny Reviewed-by: Dave Jiang > --- > drivers/cxl/core/core.h | 6 -- > drivers/cxl/core/mbox.c | 2 +- > drivers/cxl/core/memdev.c | 4 ++-- > drivers/cxl/core/region.c | 8 +++- > 4 files changed, 14 insertions(+), 6 deletions(-) > > diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h > index 15b6cf1c19ef..76c4153a9b2c 100644 > --- a/drivers/cxl/core/core.h > +++ b/drivers/cxl/core/core.h > @@ -39,7 +39,8 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder > *cxled); > int cxl_region_init(void); > void cxl_region_exit(void); > int cxl_get_poison_by_endpoint(struct cxl_port *port); > -struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 > dpa); > +struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa, > + struct cxl_endpoint_decoder **cxled); > u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, > u64 dpa); > > @@ -50,7 +51,8 @@ static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, > return ULLONG_MAX; > } > static inline > -struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa) > +struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa, > + struct cxl_endpoint_decoder **cxled) > { > return NULL; > } > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c > index 68c26c4be91a..01a447aaa1b1 100644 > --- a/drivers/cxl/core/mbox.c > +++ b/drivers/cxl/core/mbox.c > @@ -909,7 +909,7 @@ void cxl_event_trace_record(const struct cxl_memdev > *cxlmd, > guard(rwsem_read)(&cxl_dpa_rwsem); > > dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK; > - cxlr = cxl_dpa_to_region(cxlmd, dpa); > + cxlr = cxl_dpa_to_region(cxlmd, dpa, NULL); > if (cxlr) > hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa); > > diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c > index 7da1f0f5711a..12fb07fb89a6 100644 > --- a/drivers/cxl/core/memdev.c > +++ b/drivers/cxl/core/memdev.c > @@ -323,7 +323,7 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) > if (rc) > goto out; > > - cxlr = cxl_dpa_to_region(cxlmd, dpa); > + cxlr = cxl_dpa_to_region(cxlmd, dpa, NULL); > if (cxlr) > dev_warn_once(mds->cxlds.dev, > "poison inject dpa:%#llx region: %s\n", dpa, > @@ -387,7 +387,7 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) > if (rc) > goto out; > > - cxlr = cxl_dpa_to_region(cxlmd, dpa); > + cxlr = cxl_dpa_to_region(cxlmd, dpa, NULL); > if (cxlr) > dev_warn_once(mds->cxlds.dev, > "poison clear dpa:%#llx region: %s\n", dpa, > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c > index 35c4a1f4f9bd..8e0884b52f84 100644 > --- a/drivers/cxl/core/region.c > +++ b/drivers/cxl/core/region.c > @@ -2828,6 +2828,7 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port) > struct cxl_dpa_to_region_context { > struct cxl_region *cxlr; > u64 dpa; > + struct cxl_endpoint_decoder *cxled; > }; > > static int __cxl_dpa_to_region(struct device *dev, void *arg) > @@ -2861,11 +2862,13 @@ static int __cxl_dpa_to_region(struct device *dev, > void *arg) > dev_name(dev)); > > ctx->cxlr = cxlr; > + ctx->cxled = cxled; > > return 1; > } > > -struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa) > +struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa, > + struct cxl_endpoint_decoder **cxled) > { > struct cxl_dpa_to_region_context ctx; > struct cxl_port *port; > @@ -2877,6 +2880,9 @@ struct cxl_region *cxl_dpa_to_region(const struct > cxl_memdev *cxlmd, u64 dpa) > if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port)) > device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); > > + if (cxled) > + *cxled = ctx.cxled; > + > return ctx.cxlr; > } > >
Re: [PATCH v3 18/25] cxl/extent: Process DCD events and realize region extents
On 8/16/24 7:44 AM, ira.we...@intel.com wrote: > From: Navneet Singh > > A dynamic capacity device (DCD) sends events to signal the host for > changes in the availability of Dynamic Capacity (DC) memory. These > events contain extents describing a DPA range and meta data for memory > to be added or removed. Events may be sent from the device at any time. > > Three types of events can be signaled, Add, Release, and Force Release. > > On add, the host may accept or reject the memory being offered. If no > region exists, or the extent is invalid, the extent should be rejected. > Add extent events may be grouped by a 'more' bit which indicates those > extents should be processed as a group. > > On remove, the host can delay the response until the host is safely not > using the memory. If no region exists the release can be sent > immediately. The host may also release extents (or partial extents) at > any time. Thus the 'more' bit grouping of release events is of less > value and can be ignored in favor of sending multiple release capacity > responses for groups of release events. > > Force removal is intended as a mechanism between the FM and the device > and intended only when the host is unresponsive, out of sync, or > otherwise broken. Purposely ignore force removal events. > > Regions are made up of one or more devices which may be surfacing memory > to the host. Once all devices in a region have surfaced an extent the > region can expose a corresponding extent for the user to consume. > Without interleaving a device extent forms a 1:1 relationship with the > region extent. Immediately surface a region extent upon getting a > device extent. > > Per the specification the device is allowed to offer or remove extents > at any time. However, anticipated use cases can expect extents to be > offered, accepted, and removed in well defined chunks. > > Simplify extent tracking with the following restrictions. > > 1) Flag for removal any extent which overlaps a requested > release range. > 2) Refuse the offer of extents which overlap already accepted > memory ranges. > 3) Accept again a range which has already been accepted by the > host. (It is likely the device has an error because it > should already know that this range was accepted. But from > the host point of view it is safe to acknowledge that > acceptance again.) > > Management of the region extent devices must be synchronized with > potential uses of the memory within the DAX layer. Create region extent > devices as children of the cxl_dax_region device such that the DAX > region driver can co-drive them and synchronize with the DAX layer. > Synchronization and management is handled in a subsequent patch. > > Process DCD events and create region devices. > > Signed-off-by: Navneet Singh > Co-developed-by: Ira Weiny > Signed-off-by: Ira Weiny A few nits below, but in general Reviewed-by: Dave Jiang > > --- > Changes: > [iweiny: combine this with the extent surface patches to better show the > lifetime extent objects in review] > [iweiny: clean up commit message.] > [iweiny: move extent verification of the 'read extents on region > creation' to this patch] > [iweiny: Provide for a common path for extent realization between an add >event and adding existing extents.] > [iweiny: Persist a check that an extent is within an endpoint decoder] > [iweiny: reduce exported and non-static calls] > [iweiny: use %par] > > > > [Jonathan: implement the more bit with a simple algorithm which accepts > all extents it can. > Also include the response more bit to prevent payload > overflow] > [Fan: Do not error if a contained extent is added.] > [Jonathan: allocate ida after kzalloc] > [iweiny: fix ida resource leak] > [fan/djiang: remove unneeded memset] > [djiang: fix indentation] > [Jonathan: Fix indentation] > [Jonathan/djbw: make tag a uuid] > [djbw: create helper calc_hpa_range() straight away] > [djbw: Allow for multiple cxled_extents per region_extent] > [djbw: s/cxl_ed/cxled] > [djbw: s/cxl_release_ed_extent/cxled_release_extent/] > [djbw: s/reg_ext/region_extent/] > [djbw: s/dc_extent/extent/] > [Gregory/djbw: reject shared extents] > [iweiny: predicate extent.c compile on CONFIG_CXL_REGION] > --- > drivers/cxl/core/Makefile | 2 +- > drivers/cxl/core/core.h | 13 ++ > drivers/cxl/core/extent.c | 345 > ++ > drivers/cxl/core/mbox.c | 268 ++- > drivers/cxl/core/region.c | 6 + > drivers/cxl/cxl.h | 52 ++- > drivers/cxl/cxlmem.h | 26 > include/linux/cxl-event.h | 32 + > tools/testing/cxl/Kbuild | 3 +- > 9 files changed, 743 insertions(+), 4 deletions(-) > > diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile > index 9259bcc6773c..3b812515e725 100644 > --- a/drive
Re: [PATCH v3 19/25] cxl/region/extent: Expose region extent information in sysfs
On 8/16/24 7:44 AM, ira.we...@intel.com wrote: > From: Navneet Singh > > Extent information can be helpful to the user to coordinate memory usage > with the external orchestrator and FM. > > Expose the details of region extents by creating the following > sysfs entries. > > /sys/bus/cxl/devices/dax_regionX/extentX.Y > /sys/bus/cxl/devices/dax_regionX/extentX.Y/offset > /sys/bus/cxl/devices/dax_regionX/extentX.Y/length > /sys/bus/cxl/devices/dax_regionX/extentX.Y/tag > > Signed-off-by: Navneet Singh > Co-developed-by: Ira Weiny > Signed-off-by: Ira Weiny > > --- > Changes: > [iweiny: split this out] > [Jonathan: add documentation for extent sysfs] > [Jonathan/djbw: s/label/tag] > [Jonathan/djbw: treat tag as uuid] > [djbw: use __ATTRIBUTE_GROUPS] > [djbw: make tag invisible if it is empty] > [djbw/iweiny: use conventional id names for extents; extentX.Y] > --- > Documentation/ABI/testing/sysfs-bus-cxl | 13 > drivers/cxl/core/extent.c | 58 > + > 2 files changed, 71 insertions(+) > > diff --git a/Documentation/ABI/testing/sysfs-bus-cxl > b/Documentation/ABI/testing/sysfs-bus-cxl > index 3a5ee88e551b..e97e6a73c960 100644 > --- a/Documentation/ABI/testing/sysfs-bus-cxl > +++ b/Documentation/ABI/testing/sysfs-bus-cxl > @@ -599,3 +599,16 @@ Description: > See Documentation/ABI/stable/sysfs-devices-node. access0 > provides > the number to the closest initiator and access1 provides the > number to the closest CPU. > + > +What:/sys/bus/cxl/devices/dax_regionX/extentX.Y/offset > + /sys/bus/cxl/devices/dax_regionX/extentX.Y/length > + /sys/bus/cxl/devices/dax_regionX/extentX.Y/tag I wonder consider an entry for each with their own descriptions, which seems to be the standard practice. DJ > +Date:October, 2024 > +KernelVersion: v6.12 > +Contact: linux-...@vger.kernel.org > +Description: > + (RO) [For Dynamic Capacity regions only] Extent offset and > + length within the region. Users can use the extent information > + to create DAX devices on specific extents. This is done by > + creating and destroying DAX devices in specific sequences and > + looking at the mappings created. > diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c > index 34456594cdc3..d7d526a51e2b 100644 > --- a/drivers/cxl/core/extent.c > +++ b/drivers/cxl/core/extent.c > @@ -6,6 +6,63 @@ > > #include "core.h" > > +static ssize_t offset_show(struct device *dev, struct device_attribute *attr, > +char *buf) > +{ > + struct region_extent *region_extent = to_region_extent(dev); > + > + return sysfs_emit(buf, "%#llx\n", region_extent->hpa_range.start); > +} > +static DEVICE_ATTR_RO(offset); > + > +static ssize_t length_show(struct device *dev, struct device_attribute *attr, > +char *buf) > +{ > + struct region_extent *region_extent = to_region_extent(dev); > + u64 length = range_len(®ion_extent->hpa_range); > + > + return sysfs_emit(buf, "%#llx\n", length); > +} > +static DEVICE_ATTR_RO(length); > + > +static ssize_t tag_show(struct device *dev, struct device_attribute *attr, > + char *buf) > +{ > + struct region_extent *region_extent = to_region_extent(dev); > + > + return sysfs_emit(buf, "%pUb\n", ®ion_extent->tag); > +} > +static DEVICE_ATTR_RO(tag); > + > +static struct attribute *region_extent_attrs[] = { > + &dev_attr_offset.attr, > + &dev_attr_length.attr, > + &dev_attr_tag.attr, > + NULL, > +}; > + > +static uuid_t empty_tag = { 0 }; > + > +static umode_t region_extent_visible(struct kobject *kobj, > + struct attribute *a, int n) > +{ > + struct device *dev = kobj_to_dev(kobj); > + struct region_extent *region_extent = to_region_extent(dev); > + > + if (a == &dev_attr_tag.attr && > + uuid_equal(®ion_extent->tag, &empty_tag)) > + return 0; > + > + return a->mode; > +} > + > +static const struct attribute_group region_extent_attribute_group = { > + .attrs = region_extent_attrs, > + .is_visible = region_extent_visible, > +}; > + > +__ATTRIBUTE_GROUPS(region_extent_attribute); > + > static void cxled_release_extent(struct cxl_endpoint_decoder *cxled, >struct cxled_extent *ed_extent) > { > @@ -44,6 +101,7 @@ static void region_extent_release(struct device *dev) > static const struct device_type region_extent_type = { > .name = "extent", > .release = region_extent_release, > + .groups = region_extent_attribute_groups, > }; > > bool is_region_extent(struct device *dev) >
Re: [PATCH v3 20/25] dax/bus: Factor out dev dax resize logic
On 8/16/24 7:44 AM, Ira Weiny wrote: > Dynamic Capacity regions must limit dev dax resources to those areas > which have extents backing real memory. Such DAX regions are dubbed > 'sparse' regions. In order to manage where memory is available four > alternatives were considered: > > 1) Create a single region resource child on region creation which >reserves the entire region. Then as extents are added punch holes in >this reservation. This requires new resource manipulation to punch >the holes and still requires an additional iteration over the extent >areas which may already have existing dev dax resources used. > > 2) Maintain an ordered xarray of extents which can be queried while >processing the resize logic. The issue is that existing region->res >children may artificially limit the allocation size sent to >alloc_dev_dax_range(). IE the resource children can't be directly >used in the resize logic to find where space in the region is. This >also poses a problem of managing the available size in 2 places. > > 3) Maintain a separate resource tree with extents. This option is the >same as 2) but with the different data structure. Most ideally there >should be a unified representation of the resource tree not two places >to look for space. > > 4) Create region resource children for each extent. Manage the dax dev >resize logic in the same way as before but use a region child >(extent) resource as the parents to find space within each extent. > > Option 4 can leverage the existing resize algorithm to find space within > the extents. It manages the available space in a singular resource tree > which is less complicated for finding space. > > In preparation for this change, factor out the dev_dax_resize logic. > For static regions use dax_region->res as the parent to find space for > the dax ranges. Future patches will use the same algorithm with > individual extent resources as the parent. > > Signed-off-by: Ira Weiny Reviewed-by: Dave Jiang > --- > Changes: > [iweiny: Rebase on new DAX region locking] > [iweiny: Reword commit message] > [iweiny: Drop reviews] > --- > drivers/dax/bus.c | 129 > +- > 1 file changed, 79 insertions(+), 50 deletions(-) > > diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c > index d8cb5195a227..975860371d9f 100644 > --- a/drivers/dax/bus.c > +++ b/drivers/dax/bus.c > @@ -844,11 +844,9 @@ static int devm_register_dax_mapping(struct dev_dax > *dev_dax, int range_id) > return 0; > } > > -static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, > - resource_size_t size) > +static int alloc_dev_dax_range(struct resource *parent, struct dev_dax > *dev_dax, > +u64 start, resource_size_t size) > { > - struct dax_region *dax_region = dev_dax->region; > - struct resource *res = &dax_region->res; > struct device *dev = &dev_dax->dev; > struct dev_dax_range *ranges; > unsigned long pgoff = 0; > @@ -866,14 +864,14 @@ static int alloc_dev_dax_range(struct dev_dax *dev_dax, > u64 start, > return 0; > } > > - alloc = __request_region(res, start, size, dev_name(dev), 0); > + alloc = __request_region(parent, start, size, dev_name(dev), 0); > if (!alloc) > return -ENOMEM; > > ranges = krealloc(dev_dax->ranges, sizeof(*ranges) > * (dev_dax->nr_range + 1), GFP_KERNEL); > if (!ranges) { > - __release_region(res, alloc->start, resource_size(alloc)); > + __release_region(parent, alloc->start, resource_size(alloc)); > return -ENOMEM; > } > > @@ -1026,50 +1024,45 @@ static bool adjust_ok(struct dev_dax *dev_dax, struct > resource *res) > return true; > } > > -static ssize_t dev_dax_resize(struct dax_region *dax_region, > - struct dev_dax *dev_dax, resource_size_t size) > +/** > + * dev_dax_resize_static - Expand the device into the unused portion of the > + * region. This may involve adjusting the end of an existing resource, or > + * allocating a new resource. > + * > + * @parent: parent resource to allocate this range in > + * @dev_dax: DAX device to be expanded > + * @to_alloc: amount of space to alloc; must be <= space available in @parent > + * > + * Return the amount of space allocated or -ERRNO on failure > + */ > +static ssize_t dev_dax_resize_static(struct resource *parent, > + struct dev_dax *dev_dax, > + resource_size_t to_alloc) > { > - resource_size_t avail = dax_region_avail_size(dax_region), to_alloc; > - resource_size_t dev_size = dev_dax_size(dev_dax); > - struct resource *region_res = &dax_region->res; > - struct device *dev = &dev_dax->dev; > struct resource *res, *first; > - resource_size_t alloc = 0; > int rc; >
Re: [PATCH v3 21/25] dax/region: Create resources on sparse DAX regions
On 8/16/24 7:44 AM, ira.we...@intel.com wrote: > From: Navneet Singh > > DAX regions which map dynamic capacity partitions require that memory be > allowed to come and go. Recall sparse regions were created for this > purpose. Now that extents can be realized within DAX regions the DAX > region driver can start tracking sub-resource information. > > The tight relationship between DAX region operations and extent > operations require memory changes to be controlled synchronously with > the user of the region. Synchronize through the dax_region_rwsem and by > having the region driver drive both the region device as well as the > extent sub-devices. > > Recall requests to remove extents can happen at any time and that a host > is not obligated to release the memory until it is not being used. If > an extent is not used allow a release response. > > The DAX layer has no need for the details of the CXL memory extent > devices. Expose extents to the DAX layer as device children of the DAX > region device. A single callback from the driver aids the DAX layer to > determine if the child device is an extent. The DAX layer also > registers a devres function to automatically clean up when the device is > removed from the region. > > There is a race between extents being surfaced and the dax_cxl driver > being loaded. The driver must therefore scan for any existing extents > while still under the device lock. > > Respond to extent notifications. Manage the DAX region resource tree > based on the extents lifetime. Return the status of remove > notifications to lower layers such that it can manage the hardware > appropriately. > > Signed-off-by: Navneet Singh > Co-developed-by: Ira Weiny > Signed-off-by: Ira Weiny > > --- > Changes: > [iweiny: patch reorder] > [iweiny: move hunks from other patches to clarify code changes and > add/release flows WRT dax regions] > [iweiny: use %par] > [iweiny: clean up variable names] > [iweiny: Simplify sparse_ops] > [Fan: avoid open coding range_len()] > [djbw: s/reg_ext/region_extent] > --- > drivers/cxl/core/extent.c | 76 +-- > drivers/cxl/cxl.h | 6 ++ > drivers/dax/bus.c | 243 > +- > drivers/dax/bus.h | 3 +- > drivers/dax/cxl.c | 63 +++- > drivers/dax/dax-private.h | 34 +++ > drivers/dax/hmem/hmem.c | 2 +- > drivers/dax/pmem.c| 2 +- > 8 files changed, 391 insertions(+), 38 deletions(-) > > diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c > index d7d526a51e2b..103b0bec3a4a 100644 > --- a/drivers/cxl/core/extent.c > +++ b/drivers/cxl/core/extent.c > @@ -271,20 +271,67 @@ static void calc_hpa_range(struct cxl_endpoint_decoder > *cxled, > hpa_range->end = hpa_range->start + range_len(dpa_range) - 1; > } > > +static int cxlr_notify_extent(struct cxl_region *cxlr, enum dc_event event, > + struct region_extent *region_extent) > +{ > + struct cxl_dax_region *cxlr_dax; > + struct device *dev; > + int rc = 0; > + > + cxlr_dax = cxlr->cxlr_dax; > + dev = &cxlr_dax->dev; > + dev_dbg(dev, "Trying notify: type %d HPA %par\n", > + event, ®ion_extent->hpa_range); > + > + /* > + * NOTE the lack of a driver indicates a notification has failed. No > + * user space coordiantion was possible. > + */ > + device_lock(dev); > + if (dev->driver) { > + struct cxl_driver *driver = to_cxl_drv(dev->driver); > + struct cxl_notify_data notify_data = (struct cxl_notify_data) { > + .event = event, > + .region_extent = region_extent, > + }; > + > + if (driver->notify) { > + dev_dbg(dev, "Notify: type %d HPA %par\n", > + event, ®ion_extent->hpa_range); > + rc = driver->notify(dev, ¬ify_data); > + } > + } > + device_unlock(dev); Maybe a cleaner version: guard(device)(dev); if (!dev->driver || !dev->driver->notify) return 0; dev_dbg(...); return driver->notify(dev, ¬ify_data); > + return rc; > +} > + > +struct rm_data { > + struct cxl_region *cxlr; > + struct range *range; > +}; > + > static int cxlr_rm_extent(struct device *dev, void *data) > { > struct region_extent *region_extent = to_region_extent(dev); > - struct range *region_hpa_range = data; > + struct rm_data *rm_data = data; > + int rc; > > if (!region_extent) > return 0; > > /* > - * Any extent which 'touches' the released range is removed. > + * Any extent which 'touches' the released range is attempted to be > + * removed. >*/ > - if (range_overlaps(region_hpa_range, ®ion_extent->hpa_range)) { > + if (range_overlaps(rm_data->range, ®ion_extent->hpa_range)) { > +
Re: [PATCH v3 22/25] cxl/region: Read existing extents on region creation
On 8/16/24 7:44 AM, ira.we...@intel.com wrote: > From: Navneet Singh > > Dynamic capacity device extents may be left in an accepted state on a > device due to an unexpected host crash. In this case it is expected > that the creation of a new region on top of a DC partition can read > those extents and surface them for continued use. > > Once all endpoint decoders are part of a region and the region is being > realized a read of the devices extent list can reveal these previously > accepted extents. Once all endpoint decoders are part of a region and the region is being realized, a read of the 'devices extend list' can reveal these previously accepted extents. > > CXL r3.1 specifies the mailbox call Get Dynamic Capacity Extent List for > this purpose. The call returns all the extents for all dynamic capacity > partitions. If the fabric manager is adding extents to any DCD > partition, the extent list for the recovered region may change. In this > case the query must retry. Upon retry the query could encounter extents > which were accepted on a previous list query. Adding such extents is > ignored without error because they are entirely within a previous > accepted extent. > > The scan for existing extents races with the dax_cxl driver. This is > synchronized through the region device lock. Extents which are found > after the driver has loaded will surface through the normal notification > path while extents seen prior to the driver are read during driver load. > > Signed-off-by: Navneet Singh > Co-developed-by: Ira Weiny > Signed-off-by: Ira Weiny > > --- > Changes: > [iweiny: Leverage the new add path from the event processing code such >that the adding and surfacing of extents flows through the same >code path for both event processing and existing extents. >While this does validate existing extents again on start up >this is an error recovery case / new boot scenario and should >not cause any major issues while making the code more >straight forward and maintainable.] > > [iweiny: use %par] > [iweiny: rebase] > [iweiny: Move this patch later in the series such that the realization > of extents can go through the same path as an add event] > [Fan: Issue a retry if the gen number changes] > [djiang: s/uint64_t/u64/] > [djiang: update function names] > [Jørgen/djbw: read the generation and total count on first iteration of > the Get Extent List call] > [djbw: s/cxl_mbox_get_dc_extent_in/cxl_mbox_get_extent_in/] > [djbw: s/cxl_mbox_get_dc_extent_out/cxl_mbox_get_extent_out/] > [djbw/iweiny: s/cxl_read_dc_extents/cxl_read_extent_list] > --- > drivers/cxl/core/core.h | 2 + > drivers/cxl/core/mbox.c | 100 > ++ > drivers/cxl/core/region.c | 12 ++ > drivers/cxl/cxlmem.h | 21 ++ > 4 files changed, 135 insertions(+) > > diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h > index 8dfc97b2e0a4..9e54064a6f48 100644 > --- a/drivers/cxl/core/core.h > +++ b/drivers/cxl/core/core.h > @@ -21,6 +21,8 @@ cxled_to_mds(struct cxl_endpoint_decoder *cxled) > return container_of(cxlds, struct cxl_memdev_state, cxlds); > } > > +void cxl_read_extent_list(struct cxl_endpoint_decoder *cxled); > + > #ifdef CONFIG_CXL_REGION > extern struct device_attribute dev_attr_create_pmem_region; > extern struct device_attribute dev_attr_create_ram_region; > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c > index f629ad7488ac..d43ac8eabf56 100644 > --- a/drivers/cxl/core/mbox.c > +++ b/drivers/cxl/core/mbox.c > @@ -1670,6 +1670,106 @@ int cxl_dev_dynamic_capacity_identify(struct > cxl_memdev_state *mds) > } > EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL); > > +/* Return -EAGAIN if the extent list changes while reading */ > +static int __cxl_read_extent_list(struct cxl_endpoint_decoder *cxled) > +{ > + u32 current_index, total_read, total_expected, initial_gen_num; > + struct cxl_memdev_state *mds = cxled_to_mds(cxled); > + struct device *dev = mds->cxlds.dev; > + struct cxl_mbox_cmd mbox_cmd; > + u32 max_extent_count; > + bool first = true; > + > + struct cxl_mbox_get_extent_out *extents __free(kfree) = > + kvmalloc(mds->payload_size, GFP_KERNEL); > + if (!extents) > + return -ENOMEM; > + > + total_read = 0; > + current_index = 0; > + total_expected = 0; > + max_extent_count = (mds->payload_size - sizeof(*extents)) / > + sizeof(struct cxl_extent); > + do { > + struct cxl_mbox_get_extent_in get_extent; > + u32 nr_returned, current_total, current_gen_num; > + int rc; > + > + get_extent = (struct cxl_mbox_get_extent_in) { > + .extent_cnt = max(max_extent_count, > + total_expected - current_index), > +