On 8/16/24 7:44 AM, ira.we...@intel.com wrote:
> From: Navneet Singh <navneet.si...@intel.com>
>
> DAX regions which map dynamic capacity partitions require that memory be
> allowed to come and go. Recall sparse regions were created for this
> purpose. Now that extents can be realized within DAX regions the DAX
> region driver can start tracking sub-resource information.
>
> The tight relationship between DAX region operations and extent
> operations require memory changes to be controlled synchronously with
> the user of the region. Synchronize through the dax_region_rwsem and by
> having the region driver drive both the region device as well as the
> extent sub-devices.
>
> Recall requests to remove extents can happen at any time and that a host
> is not obligated to release the memory until it is not being used. If
> an extent is not used allow a release response.
>
> The DAX layer has no need for the details of the CXL memory extent
> devices. Expose extents to the DAX layer as device children of the DAX
> region device. A single callback from the driver aids the DAX layer to
> determine if the child device is an extent. The DAX layer also
> registers a devres function to automatically clean up when the device is
> removed from the region.
>
> There is a race between extents being surfaced and the dax_cxl driver
> being loaded. The driver must therefore scan for any existing extents
> while still under the device lock.
>
> Respond to extent notifications. Manage the DAX region resource tree
> based on the extents lifetime. Return the status of remove
> notifications to lower layers such that it can manage the hardware
> appropriately.
>
> Signed-off-by: Navneet Singh <navneet.si...@intel.com>
> Co-developed-by: Ira Weiny <ira.we...@intel.com>
> Signed-off-by: Ira Weiny <ira.we...@intel.com>
>
> ---
> Changes:
> [iweiny: patch reorder]
> [iweiny: move hunks from other patches to clarify code changes and
> add/release flows WRT dax regions]
> [iweiny: use %par]
> [iweiny: clean up variable names]
> [iweiny: Simplify sparse_ops]
> [Fan: avoid open coding range_len()]
> [djbw: s/reg_ext/region_extent]
> ---
> drivers/cxl/core/extent.c | 76 +++++++++++++--
> drivers/cxl/cxl.h | 6 ++
> drivers/dax/bus.c | 243
> +++++++++++++++++++++++++++++++++++++++++-----
> drivers/dax/bus.h | 3 +-
> drivers/dax/cxl.c | 63 +++++++++++-
> drivers/dax/dax-private.h | 34 +++++++
> drivers/dax/hmem/hmem.c | 2 +-
> drivers/dax/pmem.c | 2 +-
> 8 files changed, 391 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c
> index d7d526a51e2b..103b0bec3a4a 100644
> --- a/drivers/cxl/core/extent.c
> +++ b/drivers/cxl/core/extent.c
> @@ -271,20 +271,67 @@ static void calc_hpa_range(struct cxl_endpoint_decoder
> *cxled,
> hpa_range->end = hpa_range->start + range_len(dpa_range) - 1;
> }
>
> +static int cxlr_notify_extent(struct cxl_region *cxlr, enum dc_event event,
> + struct region_extent *region_extent)
> +{
> + struct cxl_dax_region *cxlr_dax;
> + struct device *dev;
> + int rc = 0;
> +
> + cxlr_dax = cxlr->cxlr_dax;
> + dev = &cxlr_dax->dev;
> + dev_dbg(dev, "Trying notify: type %d HPA %par\n",
> + event, ®ion_extent->hpa_range);
> +
> + /*
> + * NOTE the lack of a driver indicates a notification has failed. No
> + * user space coordiantion was possible.
> + */
> + device_lock(dev);
> + if (dev->driver) {
> + struct cxl_driver *driver = to_cxl_drv(dev->driver);
> + struct cxl_notify_data notify_data = (struct cxl_notify_data) {
> + .event = event,
> + .region_extent = region_extent,
> + };
> +
> + if (driver->notify) {
> + dev_dbg(dev, "Notify: type %d HPA %par\n",
> + event, ®ion_extent->hpa_range);
> + rc = driver->notify(dev, ¬ify_data);
> + }
> + }
> + device_unlock(dev);
Maybe a cleaner version:
guard(device)(dev);
if (!dev->driver || !dev->driver->notify)
return 0;
dev_dbg(...);
return driver->notify(dev, ¬ify_data);
> + return rc;
> +}
> +
> +struct rm_data {
> + struct cxl_region *cxlr;
> + struct range *range;
> +};
> +
> static int cxlr_rm_extent(struct device *dev, void *data)
> {
> struct region_extent *region_extent = to_region_extent(dev);
> - struct range *region_hpa_range = data;
> + struct rm_data *rm_data = data;
> + int rc;
>
> if (!region_extent)
> return 0;
>
> /*
> - * Any extent which 'touches' the released range is removed.
> + * Any extent which 'touches' the released range is attempted to be
> + * removed.
> */
> - if (range_overlaps(region_hpa_range, ®ion_extent->hpa_range)) {
> + if (range_overlaps(rm_data->range, ®ion_extent->hpa_range)) {
> + struct cxl_region *cxlr = rm_data->cxlr;
> +
> dev_dbg(dev, "Remove region extent HPA %par\n",
> ®ion_extent->hpa_range);
> + rc = cxlr_notify_extent(cxlr, DCD_RELEASE_CAPACITY,
> region_extent);
> + if (rc == -EBUSY)
> + return 0;
> + /* Extent not in use or error, remove it */
> region_rm_extent(region_extent);
> }
> return 0;
> @@ -312,8 +359,13 @@ int cxl_rm_extent(struct cxl_memdev_state *mds, struct
> cxl_extent *extent)
>
> calc_hpa_range(cxled, cxlr->cxlr_dax, &dpa_range, &hpa_range);
>
> + struct rm_data rm_data = {
> + .cxlr = cxlr,
> + .range = &hpa_range,
> + };
> +
> /* Remove region extents which overlap */
> - return device_for_each_child(&cxlr->cxlr_dax->dev, &hpa_range,
> + return device_for_each_child(&cxlr->cxlr_dax->dev, &rm_data,
> cxlr_rm_extent);
> }
>
> @@ -338,8 +390,20 @@ static int cxlr_add_extent(struct cxl_dax_region
> *cxlr_dax,
> return rc;
> }
>
> - /* device model handles freeing region_extent */
> - return online_region_extent(region_extent);
> + rc = online_region_extent(region_extent);
> + /* device model handled freeing region_extent */
> + if (rc)
> + return rc;
> +
> + rc = cxlr_notify_extent(cxlr_dax->cxlr, DCD_ADD_CAPACITY,
> region_extent);
> + /*
> + * The region device was breifly live but DAX layer ensures it was not
> + * used
> + */
> + if (rc)
> + region_rm_extent(region_extent);
> +
> + return rc;
> }
>
> /* Callers are expected to ensure cxled has been attached to a region */
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index c858e3957fd5..9abbfc68c6ad 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -916,10 +916,16 @@ bool is_cxl_region(struct device *dev);
>
> extern struct bus_type cxl_bus_type;
>
> +struct cxl_notify_data {
> + enum dc_event event;
> + struct region_extent *region_extent;
> +};
> +
> struct cxl_driver {
> const char *name;
> int (*probe)(struct device *dev);
> void (*remove)(struct device *dev);
> + int (*notify)(struct device *dev, struct cxl_notify_data *notify_data);
> struct device_driver drv;
> int id;
> };
> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> index 975860371d9f..f14b0cfa7edd 100644
> --- a/drivers/dax/bus.c
> +++ b/drivers/dax/bus.c
> @@ -183,6 +183,83 @@ static bool is_sparse(struct dax_region *dax_region)
> return (dax_region->res.flags & IORESOURCE_DAX_SPARSE_CAP) != 0;
> }
>
> +static void __dax_release_resource(struct dax_resource *dax_resource)
> +{
> + struct dax_region *dax_region = dax_resource->region;
> +
> + lockdep_assert_held_write(&dax_region_rwsem);
> + dev_dbg(dax_region->dev, "Extent release resource %pr\n",
> + dax_resource->res);
> + if (dax_resource->res)
> + __release_region(&dax_region->res, dax_resource->res->start,
> + resource_size(dax_resource->res));
> + dax_resource->res = NULL;
> +}
> +
> +static void dax_release_resource(void *res)
> +{
> + struct dax_resource *dax_resource = res;
> +
> + guard(rwsem_write)(&dax_region_rwsem);
> + __dax_release_resource(dax_resource);
> + kfree(dax_resource);
> +}
> +
> +int dax_region_add_resource(struct dax_region *dax_region,
> + struct device *device,
> + resource_size_t start, resource_size_t length)
>
kdoc header?
+{
> + struct resource *new_resource;
> + int rc;
> +
> + struct dax_resource *dax_resource __free(kfree) =
> + kzalloc(sizeof(*dax_resource), GFP_KERNEL);
> + if (!dax_resource)
> + return -ENOMEM;
> +
> + guard(rwsem_write)(&dax_region_rwsem);
> +
> + dev_dbg(dax_region->dev, "DAX region resource %pr\n", &dax_region->res);
> + new_resource = __request_region(&dax_region->res, start, length,
> "extent", 0);
> + if (!new_resource) {
> + dev_err(dax_region->dev, "Failed to add region s:%pa l:%pa\n",
> + &start, &length);
> + return -ENOSPC;
> + }
> +
> + dev_dbg(dax_region->dev, "add resource %pr\n", new_resource);
> + dax_resource->region = dax_region;
> + dax_resource->res = new_resource;
> + dev_set_drvdata(device, dax_resource);
> + rc = devm_add_action_or_reset(device, dax_release_resource,
> + no_free_ptr(dax_resource));
> + /* On error; ensure driver data is cleared under semaphore */
> + if (rc)
> + dev_set_drvdata(device, NULL);
> + return rc;
> +}
> +EXPORT_SYMBOL_GPL(dax_region_add_resource);
> +
> +int dax_region_rm_resource(struct dax_region *dax_region,
> + struct device *dev)
kdoc header
> +{
> + struct dax_resource *dax_resource;
> +
> + guard(rwsem_write)(&dax_region_rwsem);
> +
> + dax_resource = dev_get_drvdata(dev);
> + if (!dax_resource)
> + return 0;
> +
> + if (dax_resource->use_cnt)
> + return -EBUSY;
> +
> + /* avoid races with users trying to use the extent */
> + __dax_release_resource(dax_resource);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(dax_region_rm_resource);
> +
> bool static_dev_dax(struct dev_dax *dev_dax)
> {
> return is_static(dev_dax->region);
> @@ -296,19 +373,44 @@ static ssize_t region_align_show(struct device *dev,
> static struct device_attribute dev_attr_region_align =
> __ATTR(align, 0400, region_align_show, NULL);
>
> +#define for_each_child_resource(extent, res) \
> + for (res = (extent)->child; res; res = res->sibling)
> +
> +resource_size_t
> +dax_avail_size(struct resource *dax_resource)
kdoc header
DJ
> +{
> + resource_size_t rc;
> + struct resource *used_res;
> +
> + rc = resource_size(dax_resource);
> + for_each_child_resource(dax_resource, used_res)
> + rc -= resource_size(used_res);
> + return rc;
> +}
> +EXPORT_SYMBOL_GPL(dax_avail_size);
> +
> #define for_each_dax_region_resource(dax_region, res) \
> for (res = (dax_region)->res.child; res; res = res->sibling)
>
> static unsigned long long dax_region_avail_size(struct dax_region
> *dax_region)
> {
> - resource_size_t size = resource_size(&dax_region->res);
> + resource_size_t size;
> struct resource *res;
>
> lockdep_assert_held(&dax_region_rwsem);
>
> - if (is_sparse(dax_region))
> - return 0;
> + if (is_sparse(dax_region)) {
> + /*
> + * Children of a sparse region represent available space not
> + * used space.
> + */
> + size = 0;
> + for_each_dax_region_resource(dax_region, res)
> + size += dax_avail_size(res);
> + return size;
> + }
>
> + size = resource_size(&dax_region->res);
> for_each_dax_region_resource(dax_region, res)
> size -= resource_size(res);
> return size;
> @@ -449,15 +551,26 @@ EXPORT_SYMBOL_GPL(kill_dev_dax);
> static void trim_dev_dax_range(struct dev_dax *dev_dax)
> {
> int i = dev_dax->nr_range - 1;
> - struct range *range = &dev_dax->ranges[i].range;
> + struct dev_dax_range *dev_range = &dev_dax->ranges[i];
> + struct range *range = &dev_range->range;
> struct dax_region *dax_region = dev_dax->region;
> + struct resource *res = &dax_region->res;
>
> lockdep_assert_held_write(&dax_region_rwsem);
> dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
> (unsigned long long)range->start,
> (unsigned long long)range->end);
>
> - __release_region(&dax_region->res, range->start, range_len(range));
> + if (dev_range->dax_resource) {
> + res = dev_range->dax_resource->res;
> + dev_dbg(&dev_dax->dev, "Trim sparse extent %pr\n", res);
> + }
> +
> + __release_region(res, range->start, range_len(range));
> +
> + if (dev_range->dax_resource)
> + dev_range->dax_resource->use_cnt--;
> +
> if (--dev_dax->nr_range == 0) {
> kfree(dev_dax->ranges);
> dev_dax->ranges = NULL;
> @@ -640,7 +753,7 @@ static void dax_region_unregister(void *region)
>
> struct dax_region *alloc_dax_region(struct device *parent, int region_id,
> struct range *range, int target_node, unsigned int align,
> - unsigned long flags)
> + unsigned long flags, struct dax_sparse_ops *sparse_ops)
> {
> struct dax_region *dax_region;
>
> @@ -658,12 +771,16 @@ struct dax_region *alloc_dax_region(struct device
> *parent, int region_id,
> || !IS_ALIGNED(range_len(range), align))
> return NULL;
>
> + if (!sparse_ops && (flags & IORESOURCE_DAX_SPARSE_CAP))
> + return NULL;
> +
> dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
> if (!dax_region)
> return NULL;
>
> dev_set_drvdata(parent, dax_region);
> kref_init(&dax_region->kref);
> + dax_region->sparse_ops = sparse_ops;
> dax_region->id = region_id;
> dax_region->align = align;
> dax_region->dev = parent;
> @@ -845,7 +962,8 @@ static int devm_register_dax_mapping(struct dev_dax
> *dev_dax, int range_id)
> }
>
> static int alloc_dev_dax_range(struct resource *parent, struct dev_dax
> *dev_dax,
> - u64 start, resource_size_t size)
> + u64 start, resource_size_t size,
> + struct dax_resource *dax_resource)
> {
> struct device *dev = &dev_dax->dev;
> struct dev_dax_range *ranges;
> @@ -884,6 +1002,7 @@ static int alloc_dev_dax_range(struct resource *parent,
> struct dev_dax *dev_dax,
> .start = alloc->start,
> .end = alloc->end,
> },
> + .dax_resource = dax_resource,
> };
>
> dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1,
> @@ -966,7 +1085,8 @@ static int dev_dax_shrink(struct dev_dax *dev_dax,
> resource_size_t size)
> int i;
>
> for (i = dev_dax->nr_range - 1; i >= 0; i--) {
> - struct range *range = &dev_dax->ranges[i].range;
> + struct dev_dax_range *dev_range = &dev_dax->ranges[i];
> + struct range *range = &dev_range->range;
> struct dax_mapping *mapping = dev_dax->ranges[i].mapping;
> struct resource *adjust = NULL, *res;
> resource_size_t shrink;
> @@ -982,12 +1102,21 @@ static int dev_dax_shrink(struct dev_dax *dev_dax,
> resource_size_t size)
> continue;
> }
>
> - for_each_dax_region_resource(dax_region, res)
> - if (strcmp(res->name, dev_name(dev)) == 0
> - && res->start == range->start) {
> - adjust = res;
> - break;
> - }
> + if (dev_range->dax_resource) {
> + for_each_child_resource(dev_range->dax_resource->res,
> res)
> + if (strcmp(res->name, dev_name(dev)) == 0
> + && res->start == range->start) {
> + adjust = res;
> + break;
> + }
> + } else {
> + for_each_dax_region_resource(dax_region, res)
> + if (strcmp(res->name, dev_name(dev)) == 0
> + && res->start == range->start) {
> + adjust = res;
> + break;
> + }
> + }
>
> if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1,
> "failed to find matching resource\n"))
> @@ -1025,19 +1154,21 @@ static bool adjust_ok(struct dev_dax *dev_dax, struct
> resource *res)
> }
>
> /**
> - * dev_dax_resize_static - Expand the device into the unused portion of the
> - * region. This may involve adjusting the end of an existing resource, or
> - * allocating a new resource.
> + * __dev_dax_resize - Expand the device into the unused portion of the
> region.
> + * This may involve adjusting the end of an existing resource, or allocating
> a
> + * new resource.
> *
> * @parent: parent resource to allocate this range in
> * @dev_dax: DAX device to be expanded
> * @to_alloc: amount of space to alloc; must be <= space available in @parent
> + * @dax_resource: if sparse; the parent resource
> *
> * Return the amount of space allocated or -ERRNO on failure
> */
> -static ssize_t dev_dax_resize_static(struct resource *parent,
> - struct dev_dax *dev_dax,
> - resource_size_t to_alloc)
> +static ssize_t __dev_dax_resize(struct resource *parent,
> + struct dev_dax *dev_dax,
> + resource_size_t to_alloc,
> + struct dax_resource *dax_resource)
> {
> struct resource *res, *first;
> int rc;
> @@ -1045,7 +1176,8 @@ static ssize_t dev_dax_resize_static(struct resource
> *parent,
> first = parent->child;
> if (!first) {
> rc = alloc_dev_dax_range(parent, dev_dax,
> - parent->start, to_alloc);
> + parent->start, to_alloc,
> + dax_resource);
> if (rc)
> return rc;
> return to_alloc;
> @@ -1059,7 +1191,8 @@ static ssize_t dev_dax_resize_static(struct resource
> *parent,
> if (res == first && res->start > parent->start) {
> alloc = min(res->start - parent->start, to_alloc);
> rc = alloc_dev_dax_range(parent, dev_dax,
> - parent->start, alloc);
> + parent->start, alloc,
> + dax_resource);
> if (rc)
> return rc;
> return alloc;
> @@ -1083,7 +1216,8 @@ static ssize_t dev_dax_resize_static(struct resource
> *parent,
> return rc;
> return alloc;
> }
> - rc = alloc_dev_dax_range(parent, dev_dax, res->end + 1, alloc);
> + rc = alloc_dev_dax_range(parent, dev_dax, res->end + 1, alloc,
> + dax_resource);
> if (rc)
> return rc;
> return alloc;
> @@ -1094,6 +1228,54 @@ static ssize_t dev_dax_resize_static(struct resource
> *parent,
> return 0;
> }
>
> +static ssize_t dev_dax_resize_static(struct dax_region *dax_region,
> + struct dev_dax *dev_dax,
> + resource_size_t to_alloc)
> +{
> + return __dev_dax_resize(&dax_region->res, dev_dax, to_alloc, NULL);
> +}
> +
> +static int find_free_extent(struct device *dev, void *data)
> +{
> + struct dax_region *dax_region = data;
> + struct dax_resource *dax_resource;
> +
> + if (!dax_region->sparse_ops->is_extent(dev))
> + return 0;
> +
> + dax_resource = dev_get_drvdata(dev);
> + if (!dax_resource || !dax_avail_size(dax_resource->res))
> + return 0;
> + return 1;
> +}
> +
> +static ssize_t dev_dax_resize_sparse(struct dax_region *dax_region,
> + struct dev_dax *dev_dax,
> + resource_size_t to_alloc)
> +{
> + struct dax_resource *dax_resource;
> + resource_size_t available_size;
> + struct device *extent_dev;
> + ssize_t alloc;
> +
> + extent_dev = device_find_child(dax_region->dev, dax_region,
> + find_free_extent);
> + if (!extent_dev)
> + return 0;
> +
> + dax_resource = dev_get_drvdata(extent_dev);
> + if (!dax_resource)
> + return 0;
> +
> + available_size = dax_avail_size(dax_resource->res);
> + to_alloc = min(available_size, to_alloc);
> + alloc = __dev_dax_resize(dax_resource->res, dev_dax, to_alloc,
> dax_resource);
> + if (alloc > 0)
> + dax_resource->use_cnt++;
> + put_device(extent_dev);
> + return alloc;
> +}
> +
> static ssize_t dev_dax_resize(struct dax_region *dax_region,
> struct dev_dax *dev_dax, resource_size_t size)
> {
> @@ -1117,7 +1299,10 @@ static ssize_t dev_dax_resize(struct dax_region
> *dax_region,
> return -ENXIO;
>
> retry:
> - alloc = dev_dax_resize_static(&dax_region->res, dev_dax, to_alloc);
> + if (is_sparse(dax_region))
> + alloc = dev_dax_resize_sparse(dax_region, dev_dax, to_alloc);
> + else
> + alloc = dev_dax_resize_static(dax_region, dev_dax, to_alloc);
> if (alloc <= 0)
> return alloc;
> to_alloc -= alloc;
> @@ -1226,7 +1411,7 @@ static ssize_t mapping_store(struct device *dev, struct
> device_attribute *attr,
> to_alloc = range_len(&r);
> if (alloc_is_aligned(dev_dax, to_alloc))
> rc = alloc_dev_dax_range(&dax_region->res, dev_dax, r.start,
> - to_alloc);
> + to_alloc, NULL);
> up_write(&dax_dev_rwsem);
> up_write(&dax_region_rwsem);
>
> @@ -1494,8 +1679,14 @@ static struct dev_dax *__devm_create_dev_dax(struct
> dev_dax_data *data)
> device_initialize(dev);
> dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id);
>
> + if (is_sparse(dax_region) && data->size) {
> + dev_err(parent, "Sparse DAX region devices are created
> initially with 0 size");
> + rc = -EINVAL;
> + goto err_id;
> + }
> +
> rc = alloc_dev_dax_range(&dax_region->res, dev_dax,
> dax_region->res.start,
> - data->size);
> + data->size, NULL);
> if (rc)
> goto err_range;
>
> diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
> index 783bfeef42cc..ae5029ea6047 100644
> --- a/drivers/dax/bus.h
> +++ b/drivers/dax/bus.h
> @@ -9,6 +9,7 @@ struct dev_dax;
> struct resource;
> struct dax_device;
> struct dax_region;
> +struct dax_sparse_ops;
>
> /* dax bus specific ioresource flags */
> #define IORESOURCE_DAX_STATIC BIT(0)
> @@ -17,7 +18,7 @@ struct dax_region;
>
> struct dax_region *alloc_dax_region(struct device *parent, int region_id,
> struct range *range, int target_node, unsigned int align,
> - unsigned long flags);
> + unsigned long flags, struct dax_sparse_ops *sparse_ops);
>
> struct dev_dax_data {
> struct dax_region *dax_region;
> diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c
> index 367e86b1c22a..bf3b82b0120d 100644
> --- a/drivers/dax/cxl.c
> +++ b/drivers/dax/cxl.c
> @@ -5,6 +5,60 @@
>
> #include "../cxl/cxl.h"
> #include "bus.h"
> +#include "dax-private.h"
> +
> +static int __cxl_dax_add_resource(struct dax_region *dax_region,
> + struct region_extent *region_extent)
> +{
> + resource_size_t start, length;
> + struct device *dev;
> +
> + dev = ®ion_extent->dev;
> + start = dax_region->res.start + region_extent->hpa_range.start;
> + length = range_len(®ion_extent->hpa_range);
> + return dax_region_add_resource(dax_region, dev, start, length);
> +}
> +
> +static int cxl_dax_add_resource(struct device *dev, void *data)
> +{
> + struct dax_region *dax_region = data;
> + struct region_extent *region_extent;
> +
> + region_extent = to_region_extent(dev);
> + if (!region_extent)
> + return 0;
> +
> + dev_dbg(dax_region->dev, "Adding resource HPA %par\n",
> + ®ion_extent->hpa_range);
> +
> + return __cxl_dax_add_resource(dax_region, region_extent);
> +}
> +
> +static int cxl_dax_region_notify(struct device *dev,
> + struct cxl_notify_data *notify_data)
> +{
> + struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
> + struct dax_region *dax_region = dev_get_drvdata(dev);
> + struct region_extent *region_extent = notify_data->region_extent;
> +
> + switch (notify_data->event) {
> + case DCD_ADD_CAPACITY:
> + return __cxl_dax_add_resource(dax_region, region_extent);
> + case DCD_RELEASE_CAPACITY:
> + return dax_region_rm_resource(dax_region, ®ion_extent->dev);
> + case DCD_FORCED_CAPACITY_RELEASE:
> + default:
> + dev_err(&cxlr_dax->dev, "Unknown DC event %d\n",
> + notify_data->event);
> + break;
> + }
> +
> + return -ENXIO;
> +}
> +
> +struct dax_sparse_ops sparse_ops = {
> + .is_extent = is_region_extent,
> +};
>
> static int cxl_dax_region_probe(struct device *dev)
> {
> @@ -24,14 +78,16 @@ static int cxl_dax_region_probe(struct device *dev)
> flags |= IORESOURCE_DAX_SPARSE_CAP;
>
> dax_region = alloc_dax_region(dev, cxlr->id, &cxlr_dax->hpa_range, nid,
> - PMD_SIZE, flags);
> + PMD_SIZE, flags, &sparse_ops);
> if (!dax_region)
> return -ENOMEM;
>
> - if (cxlr->mode == CXL_REGION_DC)
> + if (cxlr->mode == CXL_REGION_DC) {
> + device_for_each_child(&cxlr_dax->dev, dax_region,
> + cxl_dax_add_resource);
> /* Add empty seed dax device */
> dev_size = 0;
> - else
> + } else
> dev_size = range_len(&cxlr_dax->hpa_range);
>
> data = (struct dev_dax_data) {
> @@ -47,6 +103,7 @@ static int cxl_dax_region_probe(struct device *dev)
> static struct cxl_driver cxl_dax_region_driver = {
> .name = "cxl_dax_region",
> .probe = cxl_dax_region_probe,
> + .notify = cxl_dax_region_notify,
> .id = CXL_DEVICE_DAX_REGION,
> .drv = {
> .suppress_bind_attrs = true,
> diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
> index ccde98c3d4e2..9e9f98c85620 100644
> --- a/drivers/dax/dax-private.h
> +++ b/drivers/dax/dax-private.h
> @@ -16,6 +16,36 @@ struct inode *dax_inode(struct dax_device *dax_dev);
> int dax_bus_init(void);
> void dax_bus_exit(void);
>
> +/**
> + * struct dax_resource - For sparse regions; an active resource
> + * @region: dax_region this resources is in
> + * @res: resource
> + * @use_cnt: count the number of uses of this resource
> + *
> + * Changes to the dax_reigon and the dax_resources within it are protected by
> + * dax_region_rwsem
> + */
> +struct dax_resource {
> + struct dax_region *region;
> + struct resource *res;
> + unsigned int use_cnt;
> +};
> +int dax_region_add_resource(struct dax_region *dax_region, struct device
> *dev,
> + resource_size_t start, resource_size_t length);
> +int dax_region_rm_resource(struct dax_region *dax_region,
> + struct device *dev);
> +resource_size_t dax_avail_size(struct resource *dax_resource);
> +
> +typedef int (*match_cb)(struct device *dev, resource_size_t *size_avail);
> +
> +/**
> + * struct dax_sparse_ops - Operations for sparse regions
> + * @is_extent: return if the device is an extent
> + */
> +struct dax_sparse_ops {
> + bool (*is_extent)(struct device *dev);
> +};
> +
> /**
> * struct dax_region - mapping infrastructure for dax devices
> * @id: kernel-wide unique region for a memory range
> @@ -27,6 +57,7 @@ void dax_bus_exit(void);
> * @res: resource tree to track instance allocations
> * @seed: allow userspace to find the first unbound seed device
> * @youngest: allow userspace to find the most recently created device
> + * @sparse_ops: operations required for sparse regions
> */
> struct dax_region {
> int id;
> @@ -38,6 +69,7 @@ struct dax_region {
> struct resource res;
> struct device *seed;
> struct device *youngest;
> + struct dax_sparse_ops *sparse_ops;
> };
>
> struct dax_mapping {
> @@ -62,6 +94,7 @@ struct dax_mapping {
> * @pgoff: page offset
> * @range: resource-span
> * @mapping: device to assist in interrogating the range layout
> + * @dax_resource: if not NULL; dax sparse resource containing this range
> */
> struct dev_dax {
> struct dax_region *region;
> @@ -79,6 +112,7 @@ struct dev_dax {
> unsigned long pgoff;
> struct range range;
> struct dax_mapping *mapping;
> + struct dax_resource *dax_resource;
> } *ranges;
> };
>
> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
> index 5e7c53f18491..0eea65052874 100644
> --- a/drivers/dax/hmem/hmem.c
> +++ b/drivers/dax/hmem/hmem.c
> @@ -28,7 +28,7 @@ static int dax_hmem_probe(struct platform_device *pdev)
>
> mri = dev->platform_data;
> dax_region = alloc_dax_region(dev, pdev->id, &mri->range,
> - mri->target_node, PMD_SIZE, flags);
> + mri->target_node, PMD_SIZE, flags, NULL);
> if (!dax_region)
> return -ENOMEM;
>
> diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
> index c8ebf4e281f2..f927e855f240 100644
> --- a/drivers/dax/pmem.c
> +++ b/drivers/dax/pmem.c
> @@ -54,7 +54,7 @@ static struct dev_dax *__dax_pmem_probe(struct device *dev)
> range.start += offset;
> dax_region = alloc_dax_region(dev, region_id, &range,
> nd_region->target_node, le32_to_cpu(pfn_sb->align),
> - IORESOURCE_DAX_STATIC);
> + IORESOURCE_DAX_STATIC, NULL);
> if (!dax_region)
> return ERR_PTR(-ENOMEM);
>
>