From: Manish Honap <[email protected]> Detect a vendor-specific CXL device at vfio-pci bind time and probe its HDM decoder register block.
vfio_cxl_create_device_state() allocates per-device state via devm and reads MEM_CAPABLE and CACHE_CAPABLE from the CXL DVSEC. vfio_cxl_setup_regs() locates the component register block, temporarily maps it, calls cxl_probe_component_regs() to find the HDM block, then releases the mapping. vfio_pci_cxl_detect_and_init() chains these two steps. If either fails, vdev->cxl stays NULL and the device falls back to plain vfio-pci. Signed-off-by: Manish Honap <[email protected]> --- drivers/vfio/pci/cxl/vfio_cxl_core.c | 217 +++++++++++++++++++++++++++ drivers/vfio/pci/cxl/vfio_cxl_priv.h | 12 ++ 2 files changed, 229 insertions(+) diff --git a/drivers/vfio/pci/cxl/vfio_cxl_core.c b/drivers/vfio/pci/cxl/vfio_cxl_core.c index d12afec82ecd..b1c7603590b5 100644 --- a/drivers/vfio/pci/cxl/vfio_cxl_core.c +++ b/drivers/vfio/pci/cxl/vfio_cxl_core.c @@ -21,6 +21,158 @@ #include "../vfio_pci_priv.h" #include "vfio_cxl_priv.h" +/* + * vfio_cxl_create_device_state - Allocate and validate CXL device state + * + * Returns a pointer to the allocated vfio_pci_cxl_state on success, or + * ERR_PTR on failure. The allocation uses devm; the caller must call + * devm_kfree(&pdev->dev, cxl) on any subsequent setup failure to release + * the resource before device unbind. Using devm_kfree() to undo a devm + * allocation early is explicitly supported by the devres API. + * + * The caller assigns vdev->cxl only after all setup steps succeed, preventing + * partially-initialised state from being visible through vdev->cxl on any + * failure path. + */ +static struct vfio_pci_cxl_state * +vfio_cxl_create_device_state(struct pci_dev *pdev, u16 dvsec) +{ + struct vfio_pci_cxl_state *cxl; + u16 cap_word; + u32 hdr1; + + /* Freed automatically when pdev->dev is released. */ + cxl = devm_cxl_dev_state_create(&pdev->dev, + CXL_DEVTYPE_DEVMEM, + pdev->dev.id, dvsec, + struct vfio_pci_cxl_state, + cxlds, false); + if (!cxl) + return ERR_PTR(-ENOMEM); + + pci_read_config_dword(pdev, dvsec + PCI_DVSEC_HEADER1, &hdr1); + cxl->dvsec_len = PCI_DVSEC_HEADER1_LEN(hdr1); + + pci_read_config_word(pdev, dvsec + CXL_DVSEC_CAPABILITY_OFFSET, + &cap_word); + + /* + * Only handle vendor devices (class != 0x0502) with Mem_Capable set. + * CACHE_CAPABLE is forwarded to the VMM so it knows whether a WBI + * sequence is needed before FLR. + */ + if (!FIELD_GET(CXL_DVSEC_MEM_CAPABLE, cap_word) || + (pdev->class >> 8) == PCI_CLASS_MEMORY_CXL) { + devm_kfree(&pdev->dev, cxl); + return ERR_PTR(-ENODEV); + } + + cxl->cache_capable = FIELD_GET(CXL_DVSEC_CACHE_CAPABLE, cap_word); + + return cxl; +} + +static int vfio_cxl_setup_regs(struct vfio_pci_core_device *vdev, + struct vfio_pci_cxl_state *cxl) +{ + struct cxl_register_map *map = &cxl->cxlds.reg_map; + resource_size_t offset, bar_offset, size; + struct pci_dev *pdev = vdev->pdev; + void __iomem *base; + int ret; + u8 count; + u8 bar; + + if (WARN_ON_ONCE(!pci_is_enabled(pdev))) + return -EINVAL; + + /* Find component register block via Register Locator DVSEC */ + ret = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, map); + if (ret) + return ret; + + /* + * Request the region and map. This is a transient mapping + * used only to probe register capabilities; released immediately + * after cxl_probe_component_regs() returns. + */ + if (!request_mem_region(map->resource, map->max_size, "vfio-cxl-probe")) + return -EBUSY; + + base = ioremap(map->resource, map->max_size); + if (!base) { + ret = -ENOMEM; + goto failed_release; + } + + /* Probe component register capabilities */ + cxl_probe_component_regs(&pdev->dev, base, &map->component_map); + + /* Check if HDM decoder was found */ + if (!map->component_map.hdm_decoder.valid) { + ret = -ENODEV; + goto failed_unmap; + } + + pci_dbg(pdev, "vfio_cxl: HDM decoder at offset=0x%lx, size=0x%lx\n", + map->component_map.hdm_decoder.offset, + map->component_map.hdm_decoder.size); + + /* Get HDM register info */ + ret = cxl_get_hdm_info(&cxl->cxlds, &count, &offset, &size); + if (ret) + goto failed_unmap; + + if (!count || !size) { + ret = -ENODEV; + goto failed_unmap; + } + + cxl->hdm_count = count; + /* + * cxl_get_hdm_info() returns rmap->offset = CXL_CM_OFFSET + <hdm_within_cm> + * (see cxl_probe_component_regs() which does base += CXL_CM_OFFSET before + * reading caps and stores CXL_CM_OFFSET + cap_ptr as the offset). + * Subtract CXL_CM_OFFSET so hdm_reg_offset is relative to the CXL.mem + * register area start, which is where comp_reg_virt[0] is anchored. + * The physical BAR address for hdm_iobase is recovered by adding + * CXL_CM_OFFSET back in vfio_cxl_setup_virt_regs(). + */ + cxl->hdm_reg_offset = offset - CXL_CM_OFFSET; + cxl->hdm_reg_size = size; + + ret = cxl_regblock_get_bar_info(map, &bar, &bar_offset); + if (ret) + goto failed_unmap; + + cxl->comp_reg_bar = bar; + cxl->comp_reg_offset = bar_offset; + cxl->comp_reg_size = CXL_COMPONENT_REG_BLOCK_SIZE; + + iounmap(base); + release_mem_region(map->resource, map->max_size); + + return 0; + +failed_unmap: + iounmap(base); +failed_release: + release_mem_region(map->resource, map->max_size); + + return ret; +} + +/* + * Free CXL state early on probe failure. devm_kfree() on a live devres + * allocation removes it from the list immediately, so the normal devres + * teardown at unbind time won't double-free it. + */ +static void vfio_cxl_dev_state_free(struct pci_dev *pdev, + struct vfio_pci_cxl_state *cxl) +{ + devm_kfree(&pdev->dev, cxl); +} + /** * vfio_pci_cxl_detect_and_init - Detect and initialize a vendor-specific * CXL.mem device @@ -32,10 +184,75 @@ */ void vfio_pci_cxl_detect_and_init(struct vfio_pci_core_device *vdev) { + struct pci_dev *pdev = vdev->pdev; + struct vfio_pci_cxl_state *cxl; + u16 dvsec; + int ret; + + if (!pcie_is_cxl(pdev)) + return; + + dvsec = pci_find_dvsec_capability(pdev, + PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + return; + + /* + * CXL DVSEC found: any failure from here is a hard probe error on + * a confirmed CXL-capable device, not a silent non-CXL fallback. + * Warn the operator so misconfiguration is visible. + */ + cxl = vfio_cxl_create_device_state(pdev, dvsec); + if (IS_ERR(cxl)) { + if (PTR_ERR(cxl) != -ENODEV) + pci_warn(pdev, + "vfio-cxl: CXL device state allocation failed: %ld\n", + PTR_ERR(cxl)); + return; + } + + /* + * Required for ioremap of the component register block and + * calls to cxl_probe_component_regs(). + */ + ret = pci_enable_device_mem(pdev); + if (ret) { + pci_warn(pdev, + "vfio-cxl: pci_enable_device_mem failed: %d\n", ret); + goto free_cxl; + } + + ret = vfio_cxl_setup_regs(vdev, cxl); + if (ret) { + pci_warn(pdev, + "vfio-cxl: HDM register probing failed: %d\n", ret); + pci_disable_device(pdev); + goto free_cxl; + } + + pci_disable_device(pdev); + + /* + * Register probing succeeded. Assign vdev->cxl now so that + * all subsequent helpers can access state via vdev->cxl. + * All failure paths below clear vdev->cxl before calling + * vfio_cxl_dev_state_free(). + */ + vdev->cxl = cxl; + + return; + +free_cxl: + vfio_cxl_dev_state_free(pdev, cxl); } void vfio_pci_cxl_cleanup(struct vfio_pci_core_device *vdev) { + struct vfio_pci_cxl_state *cxl = vdev->cxl; + + if (!cxl) + return; } MODULE_IMPORT_NS("CXL"); diff --git a/drivers/vfio/pci/cxl/vfio_cxl_priv.h b/drivers/vfio/pci/cxl/vfio_cxl_priv.h index 4cecc25db410..54b1f6d885aa 100644 --- a/drivers/vfio/pci/cxl/vfio_cxl_priv.h +++ b/drivers/vfio/pci/cxl/vfio_cxl_priv.h @@ -21,8 +21,20 @@ struct vfio_pci_cxl_state { size_t hdm_reg_size; resource_size_t comp_reg_offset; size_t comp_reg_size; + u16 dvsec_len; u8 hdm_count; u8 comp_reg_bar; + bool cache_capable; }; +/* + * CXL DVSEC for CXL Devices - register offsets within the DVSEC + * (CXL 4.0 8.1.3). + * Offsets are relative to the DVSEC capability base (cxl->dvsec). + */ +#define CXL_DVSEC_CAPABILITY_OFFSET 0xa +#define CXL_DVSEC_MEM_CAPABLE BIT(2) +/* CXL DVSEC Capability register bit 0: device supports CXL.cache (HDM-DB) */ +#define CXL_DVSEC_CACHE_CAPABLE BIT(0) + #endif /* __LINUX_VFIO_CXL_PRIV_H */ -- 2.25.1

