This implements DDW for emulated and VFIO PHB. This removes all DMA windows on reset and creates the default window, same is done on the "ibm,reset-pe-dma-window" call. This converts sPAPRPHBClass::finish_realize to sPAPRPHBClass::ddw_reset and others.
The "ddw" property is enabled by default on a PHB but for compatibility pseries-2.1 machine disables it. Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> --- Changes: v4: * reset handler is back in generalized form v3: * removed reset * windows_num is now 1 or bigger rather than 0-based value and it is only changed in PHB code, not in RTAS * added page mask check in create() * added SPAPR_PCI_DDW_MAX_WINDOWS to track how many windows are already created v2: * tested on hacked emulated E1000 * implemented DDW reset on the PHB reset * spapr_pci_ddw_remove/spapr_pci_ddw_reset are public for reuse by VFIO spapr_pci_vfio: Enable DDW This implements DDW for VFIO. Host kernel support is required for this. After this patch DDW will be enabled on all machines but pseries-2.1. Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> --- Changes: v2: * remove()/reset() callbacks use spapr_pci's ones --- hw/ppc/spapr_pci.c | 160 +++++++++++++++++++++++++++++++++++--------- hw/ppc/spapr_pci_vfio.c | 98 +++++++++++++++++---------- include/hw/pci-host/spapr.h | 15 ++++- 3 files changed, 203 insertions(+), 70 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 6bd00e8..3ec03be 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -469,6 +469,126 @@ static const MemoryRegionOps spapr_msi_ops = { .endianness = DEVICE_LITTLE_ENDIAN }; +static int spapr_phb_get_win_num_cb(Object *child, void *opaque) +{ + if (object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE)) { + ++*(unsigned *)opaque; + } + return 0; +} + +unsigned spapr_phb_get_win_num(sPAPRPHBState *sphb) +{ + unsigned ret = 0; + + object_child_foreach(OBJECT(sphb), spapr_phb_get_win_num_cb, &ret); + + return ret; +} + +/* + * Dynamic DMA windows + */ +static int spapr_pci_ddw_query(sPAPRPHBState *sphb, + uint32_t *windows_supported, + uint32_t *page_size_mask, + uint32_t *dma32_window_size, + uint64_t *dma64_window_size) +{ + *windows_supported = SPAPR_PCI_DDW_MAX_WINDOWS; + *page_size_mask = DDW_PGSIZE_64K | DDW_PGSIZE_16M; + *dma32_window_size = SPAPR_PCI_TCE32_WIN_SIZE; + *dma64_window_size = ram_size; + + return 0; +} + +static int spapr_pci_ddw_create(sPAPRPHBState *sphb, uint32_t liobn, + uint32_t page_shift, uint32_t window_shift, + sPAPRTCETable **ptcet) +{ + uint64_t bus_offset = spapr_phb_get_win_num(sphb) ? + SPAPR_PCI_TCE64_START : 0; + + if (((page_shift != 16) && (page_shift != 24) && (page_shift != 12))) { + return -1; + } + + *ptcet = spapr_tce_new_table(DEVICE(sphb), liobn, + bus_offset, + page_shift, + 1ULL << (window_shift - page_shift), + false); + if (!*ptcet) { + return -1; + } + memory_region_add_subregion(&sphb->iommu_root, (*ptcet)->bus_offset, + spapr_tce_get_iommu(*ptcet)); + + return 0; +} + +int spapr_pci_ddw_remove(sPAPRPHBState *sphb, sPAPRTCETable *tcet) +{ + memory_region_del_subregion(&sphb->iommu_root, + spapr_tce_get_iommu(tcet)); + spapr_tce_free_table(tcet); + + return 0; +} + +static int spapr_pci_remove_ddw_cb(Object *child, void *opaque) +{ + sPAPRTCETable *tcet; + + tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE); + + if (tcet) { + sPAPRPHBState *sphb = opaque; + sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); + + spc->ddw_remove(sphb, tcet); + } + + return 0; +} + +int spapr_pci_ddw_reset(sPAPRPHBState *sphb) +{ + int ret; + sPAPRPHBClass *spc; + sPAPRTCETable *tcet; + uint32_t windows_supported = 0, page_size_mask = 0, dma32_window_size = 0; + uint64_t dma64_window_size = 0; + + /* Remove all windows */ + object_child_foreach(OBJECT(sphb), spapr_pci_remove_ddw_cb, sphb); + + /* Create default 32bit window */ + spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb); + if (!spc->ddw_create || !spc->ddw_query) { + return -1; + } + + ret = spc->ddw_query(sphb, &windows_supported, &page_size_mask, + &dma32_window_size, &dma64_window_size); + if (ret) { + return ret; + } + + sphb->ddw_enabled = (windows_supported > 1); + + ret = spc->ddw_create(sphb, SPAPR_PCI_LIOBN(sphb->index, 0), + SPAPR_TCE_PAGE_SHIFT, ctzl(dma32_window_size), &tcet); + if (ret) { + return ret; + } + + object_unref(OBJECT(tcet)); + + return 0; +} + /* * PHB PCI device */ @@ -484,7 +604,6 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) SysBusDevice *s = SYS_BUS_DEVICE(dev); sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); PCIHostState *phb = PCI_HOST_BRIDGE(s); - sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s); char *namebuf; int i; PCIBus *bus; @@ -622,37 +741,9 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) sphb->lsi_table[i].irq = irq; } - if (!info->finish_realize) { - error_setg(errp, "finish_realize not defined"); - return; - } - - info->finish_realize(sphb, errp); - sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free); } -static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp) -{ - sPAPRTCETable *tcet; - - tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn, - 0, - SPAPR_TCE_PAGE_SHIFT, - 0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false); - if (!tcet) { - error_setg(errp, "Unable to create TCE table for %s", - sphb->dtbusname); - return ; - } - - /* Register default 32bit DMA window */ - memory_region_add_subregion(&sphb->iommu_root, 0, - spapr_tce_get_iommu(tcet)); - - object_unref(OBJECT(tcet)); -} - static int spapr_phb_children_reset(Object *child, void *opaque) { DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE); @@ -666,7 +757,11 @@ static int spapr_phb_children_reset(Object *child, void *opaque) static void spapr_phb_reset(DeviceState *qdev) { - /* Reset the IOMMU state */ + sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(qdev); + + if (spc->ddw_reset) { + spc->ddw_reset(SPAPR_PCI_HOST_BRIDGE(qdev)); + } object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL); } @@ -801,7 +896,10 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_spapr_pci; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->cannot_instantiate_with_device_add_yet = false; - spc->finish_realize = spapr_phb_finish_realize; + spc->ddw_query = spapr_pci_ddw_query; + spc->ddw_create = spapr_pci_ddw_create; + spc->ddw_remove = spapr_pci_ddw_remove; + spc->ddw_reset = spapr_pci_ddw_reset; } static const TypeInfo spapr_phb_info = { diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c index aabf0ae..b20ac90 100644 --- a/hw/ppc/spapr_pci_vfio.c +++ b/hw/ppc/spapr_pci_vfio.c @@ -27,65 +27,89 @@ static Property spapr_phb_vfio_properties[] = { DEFINE_PROP_END_OF_LIST(), }; -static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp) +static int spapr_pci_vfio_ddw_query(sPAPRPHBState *sphb, + uint32_t *windows_supported, + uint32_t *page_size_mask, + uint32_t *dma32_window_size, + uint64_t *dma64_window_size) { sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) }; int ret; - sPAPRTCETable *tcet; - uint32_t liobn = svphb->phb.dma_liobn; - if (svphb->iommugroupid == -1) { - error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid); - return; - } - - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, - VFIO_CHECK_EXTENSION, - (void *) VFIO_SPAPR_TCE_IOMMU); - if (ret != 1) { - error_setg_errno(errp, -ret, - "spapr-vfio: SPAPR extension is not supported"); - return; - } - - ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid, + ret = vfio_container_ioctl(&sphb->iommu_as, svphb->iommugroupid, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); if (ret) { - error_setg_errno(errp, -ret, - "spapr-vfio: get info from container failed"); - return; + return ret; } - tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start, - SPAPR_TCE_PAGE_SHIFT, - info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT, - true); - if (!tcet) { - error_setg(errp, "spapr-vfio: failed to create VFIO TCE table"); - return; + *windows_supported = info.windows_supported; + *page_size_mask = info.flags & DDW_PGSIZE_MASK; + *dma32_window_size = info.dma32_window_size; + *dma64_window_size = ram_size; + + return ret; +} + +static int spapr_pci_vfio_ddw_create(sPAPRPHBState *sphb, uint32_t liobn, + uint32_t page_shift, uint32_t window_shift, + sPAPRTCETable **ptcet) +{ + sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); + struct vfio_iommu_spapr_tce_create create = { + .argsz = sizeof(create), + .page_shift = page_shift, + .window_shift = window_shift, + .levels = 1, + .start_addr = 0, + }; + int ret; + + ret = vfio_container_ioctl(&sphb->iommu_as, svphb->iommugroupid, + VFIO_IOMMU_SPAPR_TCE_CREATE, &create); + if (ret) { + return ret; } - /* Register default 32bit DMA window */ - memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset, - spapr_tce_get_iommu(tcet)); + *ptcet = spapr_tce_new_table(DEVICE(sphb), liobn, + create.start_addr, + page_shift, + 1ULL << (window_shift - page_shift), + true); + if (!*ptcet) { + return -1; + } + memory_region_add_subregion(&sphb->iommu_root, (*ptcet)->bus_offset, + spapr_tce_get_iommu(*ptcet)); - object_unref(OBJECT(tcet)); + return ret; } -static void spapr_phb_vfio_reset(DeviceState *qdev) +static int spapr_pci_vfio_ddw_remove(sPAPRPHBState *sphb, sPAPRTCETable *tcet) { - /* Do nothing */ + sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb); + struct vfio_iommu_spapr_tce_remove remove = { + .argsz = sizeof(remove), + .start_addr = tcet->bus_offset + }; + int ret; + + spapr_pci_ddw_remove(sphb, tcet); + ret = vfio_container_ioctl(&sphb->iommu_as, svphb->iommugroupid, + VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); + + return ret; } static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data) { - DeviceClass *dc = DEVICE_CLASS(klass); sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); dc->props = spapr_phb_vfio_properties; - dc->reset = spapr_phb_vfio_reset; - spc->finish_realize = spapr_phb_vfio_finish_realize; + spc->ddw_query = spapr_pci_vfio_ddw_query; + spc->ddw_create = spapr_pci_vfio_ddw_create; + spc->ddw_remove = spapr_pci_vfio_ddw_remove; } static const TypeInfo spapr_phb_vfio_info = { diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h index eec95f3..577f908 100644 --- a/include/hw/pci-host/spapr.h +++ b/include/hw/pci-host/spapr.h @@ -48,8 +48,6 @@ typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState; struct sPAPRPHBClass { PCIHostBridgeClass parent_class; - void (*finish_realize)(sPAPRPHBState *sphb, Error **errp); - /* sPAPR spec defined pagesize mask values */ #define DDW_PGSIZE_4K 0x01 #define DDW_PGSIZE_64K 0x02 @@ -106,6 +104,8 @@ struct sPAPRPHBState { int32_t msi_devs_num; spapr_pci_msi_mig *msi_devs; + bool ddw_enabled; + QLIST_ENTRY(sPAPRPHBState) list; }; @@ -129,6 +129,14 @@ struct sPAPRPHBVFIOState { #define SPAPR_PCI_MSI_WINDOW 0x40000000000ULL +#define SPAPR_PCI_TCE32_WIN_SIZE 0x80000000ULL + +/* Default 64bit dynamic window offset */ +#define SPAPR_PCI_TCE64_START 0x8000000000000000ULL + +/* Maximum allowed number of DMA windows for emulated PHB */ +#define SPAPR_PCI_DDW_MAX_WINDOWS 2 + static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin) { return xics_get_qirq(spapr->icp, phb->lsi_table[pin].irq); @@ -147,5 +155,8 @@ void spapr_pci_rtas_init(void); sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid); PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid, uint32_t config_addr); +int spapr_pci_ddw_remove(sPAPRPHBState *sphb, sPAPRTCETable *tcet); +int spapr_pci_ddw_reset(sPAPRPHBState *sphb); +unsigned spapr_phb_get_win_num(sPAPRPHBState *sphb); #endif /* __HW_SPAPR_PCI_H__ */ -- 2.0.0