On 8/16/2024 10:44 PM, ira.we...@intel.com wrote:
> From: Navneet Singh <navneet.si...@intel.com>
>
> Dynamic Capacity CXL regions must allow memory to be added or removed
> dynamically.  In addition to the quantity of memory available the
> location of the memory within a DC partition is dynamic based on the
> extents offered by a device.  CXL DAX regions must accommodate the
> sparseness of this memory in the management of DAX regions and devices.
>
> Introduce the concept of a sparse DAX region.  Add a create_dc_region()
> sysfs entry to create such regions.  Special case DC capable regions to
> create a 0 sized seed DAX device to maintain compatibility which
> requires a default DAX device to hold a region reference.
>
> Indicate 0 byte available capacity until such time that capacity is
> added.
>
> Sparse regions complicate the range mapping of dax devices.  There is no
> known use case for range mapping on sparse regions.  Avoid the
> complication by preventing range mapping of dax devices on sparse
> regions.
>
> Interleaving is deferred for now.  Add checks.
>
> Signed-off-by: Navneet Singh <navneet.si...@intel.com>
> Co-developed-by: Ira Weiny <ira.we...@intel.com>
> Signed-off-by: Ira Weiny <ira.we...@intel.com>
>
> ---
> Changes:
> [Fan: use single function for dc region store]
> [djiang: avoid setting dev_size twice]
> [djbw: Check DCD support and interleave restriction on region creation]
> [iweiny: squash patch : dax/region: Prevent range mapping allocation on 
> sparse regions]
> [iwieny: remove reviews]
> [iweiny: rebase to master]
> [iweiny: push sysfs version to 6.12]
> [iweiny: make cxled_to_mds inline]
> ---
>  Documentation/ABI/testing/sysfs-bus-cxl | 22 ++++++++--------
>  drivers/cxl/core/core.h                 | 12 +++++++++
>  drivers/cxl/core/port.c                 |  1 +
>  drivers/cxl/core/region.c               | 46 
> +++++++++++++++++++++++++++++++--
>  drivers/dax/bus.c                       | 10 +++++++
>  drivers/dax/bus.h                       |  1 +
>  drivers/dax/cxl.c                       | 16 ++++++++++--
>  7 files changed, 93 insertions(+), 15 deletions(-)
>
[...]
> @@ -2185,8 +2191,13 @@ static size_t store_targetN(struct cxl_region *cxlr, 
> const char *buf, int pos,
>                       goto out;
>               }
>  
> -             rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
> -                                TASK_INTERRUPTIBLE);
> +             cxled = to_cxl_endpoint_decoder(dev);
> +             if (cxlr->mode == CXL_REGION_DC &&
> +                 !cxl_dcd_supported(cxled_to_mds(cxled))) {
> +                     dev_dbg(dev, "DCD unsupported\n");
> +                     return -EINVAL;

need a 'goto out' here to dereference the device?


> +             }
> +             rc = attach_target(cxlr, cxled, pos, TASK_INTERRUPTIBLE);
>  out:
>               put_device(dev);
>       }
> @@ -2534,6 +2545,7 @@ static struct cxl_region *__create_region(struct 
> cxl_root_decoder *cxlrd,
>       switch (mode) {
>       case CXL_REGION_RAM:
>       case CXL_REGION_PMEM:
> +     case CXL_REGION_DC:
>               break;
>       default:
>               dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %s\n",
> @@ -2587,6 +2599,20 @@ static ssize_t create_ram_region_store(struct device 
> *dev,
>  }
>  DEVICE_ATTR_RW(create_ram_region);
>  
> +static ssize_t create_dc_region_show(struct device *dev,
> +                                  struct device_attribute *attr, char *buf)
> +{
> +     return __create_region_show(to_cxl_root_decoder(dev), buf);
> +}
> +
> +static ssize_t create_dc_region_store(struct device *dev,
> +                                   struct device_attribute *attr,
> +                                   const char *buf, size_t len)
> +{
> +     return create_region_store(dev, buf, len, CXL_REGION_DC);
> +}
> +DEVICE_ATTR_RW(create_dc_region);
> +
>  static ssize_t region_show(struct device *dev, struct device_attribute *attr,
>                          char *buf)
>  {
> @@ -3168,6 +3194,11 @@ static int devm_cxl_add_dax_region(struct cxl_region 
> *cxlr)
>       struct device *dev;
>       int rc;
>  
> +     if (cxlr->mode == CXL_REGION_DC && cxlr->params.interleave_ways != 1) {
> +             dev_err(&cxlr->dev, "Interleaving DC not supported\n");
> +             return -EINVAL;
> +     }
> +
>       cxlr_dax = cxl_dax_region_alloc(cxlr);
>       if (IS_ERR(cxlr_dax))
>               return PTR_ERR(cxlr_dax);
> @@ -3260,6 +3291,16 @@ static struct cxl_region *construct_region(struct 
> cxl_root_decoder *cxlrd,
>               return ERR_PTR(-EINVAL);
>  
>       mode = cxl_decoder_to_region_mode(cxled->mode);
> +     if (mode == CXL_REGION_DC) {
> +             if (!cxl_dcd_supported(cxled_to_mds(cxled))) {
> +                     dev_err(&cxled->cxld.dev, "DCD unsupported\n");
> +                     return ERR_PTR(-EINVAL);
> +             }
> +             if (cxled->cxld.interleave_ways != 1) {
> +                     dev_err(&cxled->cxld.dev, "Interleaving and DCD not 
> supported\n");
> +                     return ERR_PTR(-EINVAL);
> +             }
> +     }
>       do {
>               cxlr = __create_region(cxlrd, mode,
>                                      atomic_read(&cxlrd->region_id));
> @@ -3467,6 +3508,7 @@ static int cxl_region_probe(struct device *dev)
>       case CXL_REGION_PMEM:
>               return devm_cxl_add_pmem_region(cxlr);
>       case CXL_REGION_RAM:
> +     case CXL_REGION_DC:
>               /*
>                * The region can not be manged by CXL if any portion of
>                * it is already online as 'System RAM'
> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> index fde29e0ad68b..d8cb5195a227 100644
> --- a/drivers/dax/bus.c
> +++ b/drivers/dax/bus.c
> @@ -178,6 +178,11 @@ static bool is_static(struct dax_region *dax_region)
>       return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0;
>  }
>  
> +static bool is_sparse(struct dax_region *dax_region)
> +{
> +     return (dax_region->res.flags & IORESOURCE_DAX_SPARSE_CAP) != 0;
> +}
> +
>  bool static_dev_dax(struct dev_dax *dev_dax)
>  {
>       return is_static(dev_dax->region);
> @@ -301,6 +306,9 @@ static unsigned long long dax_region_avail_size(struct 
> dax_region *dax_region)
>  
>       lockdep_assert_held(&dax_region_rwsem);
>  
> +     if (is_sparse(dax_region))
> +             return 0;
> +
>       for_each_dax_region_resource(dax_region, res)
>               size -= resource_size(res);
>       return size;
> @@ -1373,6 +1381,8 @@ static umode_t dev_dax_visible(struct kobject *kobj, 
> struct attribute *a, int n)
>               return 0;
>       if (a == &dev_attr_mapping.attr && is_static(dax_region))
>               return 0;
> +     if (a == &dev_attr_mapping.attr && is_sparse(dax_region))
> +             return 0;
>       if ((a == &dev_attr_align.attr ||
>            a == &dev_attr_size.attr) && is_static(dax_region))
>               return 0444;
> diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
> index cbbf64443098..783bfeef42cc 100644
> --- a/drivers/dax/bus.h
> +++ b/drivers/dax/bus.h
> @@ -13,6 +13,7 @@ struct dax_region;
>  /* dax bus specific ioresource flags */
>  #define IORESOURCE_DAX_STATIC BIT(0)
>  #define IORESOURCE_DAX_KMEM BIT(1)
> +#define IORESOURCE_DAX_SPARSE_CAP BIT(2)
>  
>  struct dax_region *alloc_dax_region(struct device *parent, int region_id,
>               struct range *range, int target_node, unsigned int align,
> diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c
> index 9b29e732b39a..367e86b1c22a 100644
> --- a/drivers/dax/cxl.c
> +++ b/drivers/dax/cxl.c
> @@ -13,19 +13,31 @@ static int cxl_dax_region_probe(struct device *dev)
>       struct cxl_region *cxlr = cxlr_dax->cxlr;
>       struct dax_region *dax_region;
>       struct dev_dax_data data;
> +     resource_size_t dev_size;
> +     unsigned long flags;
>  
>       if (nid == NUMA_NO_NODE)
>               nid = memory_add_physaddr_to_nid(cxlr_dax->hpa_range.start);
>  
> +     flags = IORESOURCE_DAX_KMEM;
> +     if (cxlr->mode == CXL_REGION_DC)
> +             flags |= IORESOURCE_DAX_SPARSE_CAP;
> +
>       dax_region = alloc_dax_region(dev, cxlr->id, &cxlr_dax->hpa_range, nid,
> -                                   PMD_SIZE, IORESOURCE_DAX_KMEM);
> +                                   PMD_SIZE, flags);
>       if (!dax_region)
>               return -ENOMEM;
>  
> +     if (cxlr->mode == CXL_REGION_DC)
> +             /* Add empty seed dax device */
> +             dev_size = 0;
> +     else
> +             dev_size = range_len(&cxlr_dax->hpa_range);
> +
>       data = (struct dev_dax_data) {
>               .dax_region = dax_region,
>               .id = -1,
> -             .size = range_len(&cxlr_dax->hpa_range),
> +             .size = dev_size,
>               .memmap_on_memory = true,
>       };
>  
>


Reply via email to