Jonathan Cameron wrote:
> On Thu, 23 Jun 2022 21:19:50 -0700
> Dan Williams <[email protected]> wrote:
>
> > The LIBNVDIMM subsystem is a platform agnostic representation of system
> > NVDIMM / persistent memory resources. To date, the CXL subsystem's
> > interaction with LIBNVDIMM has been to register an nvdimm-bridge device
> > and cxl_nvdimm objects to proxy CXL capabilities into existing LIBNVDIMM
> > subsystem mechanics.
> >
> > With regions the approach is the same. Create a new cxl_pmem_region
> > object to proxy CXL region details into a LIBNVDIMM definition. With
> > this enabling LIBNVDIMM can partition CXL persistent memory regions with
> > legacy namespace labels. A follow-on patch will add CXL region label and
> > CXL namespace label support to persist region configurations across
> > driver reload / system-reset events.
> ah. Now I see why we share ID space with NVDIMMs. Fair enough, I should
> have read to the end ;)
>
> >
> > Co-developed-by: Ben Widawsky <[email protected]>
> > Signed-off-by: Ben Widawsky <[email protected]>
> > Signed-off-by: Dan Williams <[email protected]>
>
> End of day, so a fairly superficial review on this and I'll hopefully
> take a second look at one or two of the earlier patches when time allows.
>
> Jonathan
>
> ...
>
> > +static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region
> > *cxlr)
> > +{
> > + struct cxl_pmem_region *cxlr_pmem = ERR_PTR(-ENXIO);
>
> Rarely used, so better to set it where it is.
Ok.
>
> > + struct cxl_region_params *p = &cxlr->params;
> > + struct device *dev;
> > + int i;
> > +
> > + down_read(&cxl_region_rwsem);
> > + if (p->state != CXL_CONFIG_COMMIT)
> > + goto out;
> > + cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets),
> > + GFP_KERNEL);
> > + if (!cxlr_pmem) {
> > + cxlr_pmem = ERR_PTR(-ENOMEM);
> > + goto out;
> > + }
> > +
> > + cxlr_pmem->hpa_range.start = p->res->start;
> > + cxlr_pmem->hpa_range.end = p->res->end;
> > +
> > + /* Snapshot the region configuration underneath the cxl_region_rwsem */
> > + cxlr_pmem->nr_mappings = p->nr_targets;
> > + for (i = 0; i < p->nr_targets; i++) {
> > + struct cxl_endpoint_decoder *cxled = p->targets[i];
> > + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> > + struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
> > +
> > + m->cxlmd = cxlmd;
> > + get_device(&cxlmd->dev);
> > + m->start = cxled->dpa_res->start;
> > + m->size = resource_size(cxled->dpa_res);
> > + m->position = i;
> > + }
> > +
> > + dev = &cxlr_pmem->dev;
> > + cxlr_pmem->cxlr = cxlr;
> > + device_initialize(dev);
> > + lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
> > + device_set_pm_not_required(dev);
> > + dev->parent = &cxlr->dev;
> > + dev->bus = &cxl_bus_type;
> > + dev->type = &cxl_pmem_region_type;
> > +out:
> > + up_read(&cxl_region_rwsem);
> > +
> > + return cxlr_pmem;
> > +}
> > +
> > +static void cxlr_pmem_unregister(void *dev)
> > +{
> > + device_unregister(dev);
> > +}
> > +
> > +/**
> > + * devm_cxl_add_pmem_region() - add a cxl_region to nd_region bridge
> > + * @host: same host as @cxlmd
>
> Run kernel-doc over these and clean all the warning sup.
> Parameter if cxlr not host
Fixed.
>
>
> > + *
> > + * Return: 0 on success negative error code on failure.
> > + */
>
>
> > /*
> > * Unit test builds overrides this to __weak, find the 'strong' version
> > diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
> > index b271f6e90b91..4ba7248275ac 100644
> > --- a/drivers/cxl/pmem.c
> > +++ b/drivers/cxl/pmem.c
> > @@ -7,6 +7,7 @@
>
> >
>
>
> > +static int match_cxl_nvdimm(struct device *dev, void *data)
> > +{
> > + return is_cxl_nvdimm(dev);
> > +}
> > +
> > +static void unregister_region(void *nd_region)
>
> Better to give this a more specific name as we have several
> unregister_region() functions in CXL now.
Ok, unregister_nvdimm_region() it is.
>
> > +{
> > + struct cxl_nvdimm_bridge *cxl_nvb;
> > + struct cxl_pmem_region *cxlr_pmem;
> > + int i;
> > +
> > + cxlr_pmem = nd_region_provider_data(nd_region);
> > + cxl_nvb = cxlr_pmem->bridge;
> > + device_lock(&cxl_nvb->dev);
> > + for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
> > + struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
> > + struct cxl_nvdimm *cxl_nvd = m->cxl_nvd;
> > +
> > + if (cxl_nvd->region) {
> > + put_device(&cxlr_pmem->dev);
> > + cxl_nvd->region = NULL;
> > + }
> > + }
> > + device_unlock(&cxl_nvb->dev);
> > +
> > + nvdimm_region_delete(nd_region);
> > +}
> > +
>
> > +
> > +static int cxl_pmem_region_probe(struct device *dev)
> > +{
> > + struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE];
> > + struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
> > + struct cxl_region *cxlr = cxlr_pmem->cxlr;
> > + struct cxl_pmem_region_info *info = NULL;
> > + struct cxl_nvdimm_bridge *cxl_nvb;
> > + struct nd_interleave_set *nd_set;
> > + struct nd_region_desc ndr_desc;
> > + struct cxl_nvdimm *cxl_nvd;
> > + struct nvdimm *nvdimm;
> > + struct resource *res;
> > + int rc = 0, i;
> > +
> > + cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev);
> > + if (!cxl_nvb) {
> > + dev_dbg(dev, "bridge not found\n");
> > + return -ENXIO;
> > + }
> > + cxlr_pmem->bridge = cxl_nvb;
> > +
> > + device_lock(&cxl_nvb->dev);
> > + if (!cxl_nvb->nvdimm_bus) {
> > + dev_dbg(dev, "nvdimm bus not found\n");
> > + rc = -ENXIO;
> > + goto out;
> > + }
> > +
> > + memset(&mappings, 0, sizeof(mappings));
> > + memset(&ndr_desc, 0, sizeof(ndr_desc));
> > +
> > + res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL);
> > + if (!res) {
> > + rc = -ENOMEM;
> > + goto out;
> > + }
> > +
> > + res->name = "Persistent Memory";
> > + res->start = cxlr_pmem->hpa_range.start;
> > + res->end = cxlr_pmem->hpa_range.end;
> > + res->flags = IORESOURCE_MEM;
> > + res->desc = IORES_DESC_PERSISTENT_MEMORY;
> > +
> > + rc = insert_resource(&iomem_resource, res);
> > + if (rc)
> > + goto out;
> > +
> > + rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res);
> > + if (rc)
> > + goto out;
> > +
> > + ndr_desc.res = res;
> > + ndr_desc.provider_data = cxlr_pmem;
> > +
> > + ndr_desc.numa_node = memory_add_physaddr_to_nid(res->start);
> > + ndr_desc.target_node = phys_to_target_node(res->start);
> > + if (ndr_desc.target_node == NUMA_NO_NODE) {
> > + ndr_desc.target_node = ndr_desc.numa_node;
> > + dev_dbg(&cxlr->dev, "changing target node from %d to %d",
> > + NUMA_NO_NODE, ndr_desc.target_node);
> > + }
> > +
> > + nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
> > + if (!nd_set) {
> > + rc = -ENOMEM;
> > + goto out;
> > + }
> > +
> > + ndr_desc.memregion = cxlr->id;
> > + set_bit(ND_REGION_CXL, &ndr_desc.flags);
> > + set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
> > +
> > + info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL);
> > + if (!info)
> > + goto out;
> > +
> > + rc = -ENODEV;
>
> Personal taste, but I'd much rather see that set in the error handlers
> so I can quickly see where it applies.
Ok.
>
> > + for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
> > + struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
> > + struct cxl_memdev *cxlmd = m->cxlmd;
> > + struct cxl_dev_state *cxlds = cxlmd->cxlds;
> > + struct device *d;
> > +
> > + d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm);
> > + if (!d) {
> > + dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i,
> > + dev_name(&cxlmd->dev));
> > + goto err;
> > + }
> > +
> > + /* safe to drop ref now with bridge lock held */
> > + put_device(d);
> > +
> > + cxl_nvd = to_cxl_nvdimm(d);
> > + nvdimm = dev_get_drvdata(&cxl_nvd->dev);
> > + if (!nvdimm) {
> > + dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i,
> > + dev_name(&cxlmd->dev));
> > + goto err;
> > + }
> > + cxl_nvd->region = cxlr_pmem;
> > + get_device(&cxlr_pmem->dev);
> > + m->cxl_nvd = cxl_nvd;
> > + mappings[i] = (struct nd_mapping_desc) {
> > + .nvdimm = nvdimm,
> > + .start = m->start,
> > + .size = m->size,
> > + .position = i,
> > + };
> > + info[i].offset = m->start;
> > + info[i].serial = cxlds->serial;
> > + }
> > + ndr_desc.num_mappings = cxlr_pmem->nr_mappings;
> > + ndr_desc.mapping = mappings;
> > +
> > + /*
> > + * TODO enable CXL labels which skip the need for 'interleave-set
> > cookie'
> > + */
> > + nd_set->cookie1 =
> > + nd_fletcher64(info, sizeof(*info) * cxlr_pmem->nr_mappings, 0);
> > + nd_set->cookie2 = nd_set->cookie1;
> > + ndr_desc.nd_set = nd_set;
> > +
> > + cxlr_pmem->nd_region =
> > + nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc);
> > + if (IS_ERR(cxlr_pmem->nd_region)) {
> > + rc = PTR_ERR(cxlr_pmem->nd_region);
> > + goto err;
> > + } else
>
> no need for else as other branch has gone flying off down to
> err.
Yup.
>
> > + rc = devm_add_action_or_reset(dev, unregister_region,
> > + cxlr_pmem->nd_region);
> > +out:
>
> Having labels out: and err: where both are used for errors is pretty
> confusing naming... Perhaps you are better off just not sharing the
> good exit path with any of the error paths.
>
Ok.
>
> > + device_unlock(&cxl_nvb->dev);
> > + put_device(&cxl_nvb->dev);
> > + kfree(info);
>
> Ok, so safe to do this here, but would be nice to do this
> in reverse order of setup with multiple labels so we can avoid
> paths that free things that were never created. Doesn't look
> like it would hurt much to move kfree(info) above the device_unlock()
> and only do that if we have allocated info.
Ok, but no need for more labels, unconditionally free'ing info and
trying to unwind the mapping references can proceed if @info is
initialized to NULL and @i is initialized to 0.