On 5/20/2026 2:54 AM, Narayana Murty N wrote: > Split spapr_pci_vfio.c into two files to separate concerns: > - spapr_pci_vfio.c: Contains general VFIO routines > - spapr_pci_vfio_eeh.c: Contains EEH-specific routines > > Additionally, consolidate VFIO EEH function declarations into a new > header file (spapr_vfio.h) to improve modularity and reduce header > dependencies. > > Changes: > - Split VFIO functionality: keep general VFIO routines in > spapr_pci_vfio.c and move EEH routines to spapr_pci_vfio_eeh.c > - Created include/hw/ppc/spapr_vfio.h with forward declarations > to avoid pulling in full spapr headers and libfdt dependencies > - Introduced stubs/spapr_pci_vfio-stubs.c to consolidate all VFIO, > VFIO EEH stub functions in one place > - Updated hw/ppc/spapr_pci.c to include new spapr_vfio.h header > - Updated stubs/meson.build to reference new stub file > > This improves code organization by separating VFIO and EEH concerns, > and enhances build system modularity by making it easier to maintain > VFIO-related code separately from core sPAPR PCI code. > > Signed-off-by: Narayana Murty N <[email protected]> > --- > hw/ppc/Kconfig | 2 +- > hw/ppc/meson.build | 1 + > hw/ppc/spapr_pci.c | 1 + > hw/ppc/spapr_pci_vfio.c | 367 +---------------------------------- > hw/ppc/spapr_pci_vfio_eeh.c | 346 +++++++++++++++++++++++++++++++++ > include/hw/pci-host/spapr.h | 44 +---- > include/hw/ppc/spapr_vfio.h | 28 +++ > stubs/meson.build | 1 + > stubs/spapr_phb_vfio-stubs.c | 52 +++++ > 9 files changed, 432 insertions(+), 410 deletions(-) > create mode 100644 hw/ppc/spapr_pci_vfio_eeh.c > create mode 100644 include/hw/ppc/spapr_vfio.h > create mode 100644 stubs/spapr_phb_vfio-stubs.c > > diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig > index 347dcce690..1fb191fe83 100644 > --- a/hw/ppc/Kconfig > +++ b/hw/ppc/Kconfig > @@ -6,7 +6,7 @@ config PSERIES > imply PCI_DEVICES > imply TEST_DEVICES > imply VIRTIO_VGA > - imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c > + imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c and > spapr_pci_vfio_eeh.c > select NVDIMM > select DIMM > select PCI > diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build > index 37aa535db2..97e4be0dc9 100644 > --- a/hw/ppc/meson.build > +++ b/hw/ppc/meson.build > @@ -36,6 +36,7 @@ ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: > files('spapr_rng.c')) > if host_os == 'linux' > ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files( > 'spapr_pci_vfio.c', > + 'spapr_pci_vfio_eeh.c', > )) > endif > > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > index f08f21f03c..576b92229b 100644 > --- a/hw/ppc/spapr_pci.c > +++ b/hw/ppc/spapr_pci.c > @@ -33,6 +33,7 @@ > #include "hw/pci/msix.h" > #include "hw/pci/pci_host.h" > #include "hw/ppc/spapr.h" > +#include "hw/ppc/spapr_vfio.h" > #include "hw/pci-host/spapr.h" > #include <libfdt.h> > #include "trace.h" > diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c > index ed0b22a84a..2207654d83 100644 > --- a/hw/ppc/spapr_pci_vfio.c > +++ b/hw/ppc/spapr_pci_vfio.c > @@ -22,119 +22,11 @@ > #include <linux/vfio.h> > #include "hw/ppc/spapr.h" > #include "hw/pci-host/spapr.h" > +#include "hw/ppc/spapr_vfio.h" > #include "hw/pci/msix.h" > #include "hw/pci/pci_device.h" > #include "hw/vfio/vfio-container-legacy.h" > #include "qemu/error-report.h" > -#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */ > - > -/* > - * Interfaces for IBM EEH (Enhanced Error Handling) > - */ > -#ifdef CONFIG_VFIO_PCI > -static bool vfio_eeh_container_ok(VFIOLegacyContainer *container) > -{ > - /* > - * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO > - * implementation is broken if there are multiple groups in a > - * container. The hardware works in units of Partitionable > - * Endpoints (== IOMMU groups) and the EEH operations naively > - * iterate across all groups in the container, without any logic > - * to make sure the groups have their state synchronized. For > - * certain operations (ENABLE) that might be ok, until an error > - * occurs, but for others (GET_STATE) it's clearly broken. > - */ > - > - /* > - * XXX Once fixed kernels exist, test for them here > - */ > - > - if (QLIST_EMPTY(&container->group_list)) { > - return false; > - } > - > - if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { > - return false; > - } > - > - return true; > -} > - > -static int vfio_eeh_container_op(VFIOLegacyContainer *container, uint32_t op) > -{ > - struct vfio_eeh_pe_op pe_op = { > - .argsz = sizeof(pe_op), > - .op = op, > - }; > - int ret; > - > - if (!vfio_eeh_container_ok(container)) { > - error_report("vfio/eeh: EEH_PE_OP 0x%x: " > - "kernel requires a container with exactly one group", > op); > - return -EPERM; > - } > - > - ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); > - if (ret < 0) { > - error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); > - return -errno; > - } > - > - return ret; > -} > - > -static VFIOLegacyContainer *vfio_eeh_as_container(AddressSpace *as) > -{ > - VFIOAddressSpace *space = vfio_address_space_get(as); > - VFIOContainer *bcontainer = NULL; > - > - if (QLIST_EMPTY(&space->containers)) { > - /* No containers to act on */ > - goto out; > - } > - > - bcontainer = QLIST_FIRST(&space->containers); > - > - if (QLIST_NEXT(bcontainer, next)) { > - /* > - * We don't yet have logic to synchronize EEH state across > - * multiple containers > - */ > - bcontainer = NULL; > - goto out; > - } > - > -out: > - vfio_address_space_put(space); > - return VFIO_IOMMU_LEGACY(bcontainer); > -} > - > -static bool vfio_eeh_as_ok(AddressSpace *as) > -{ > - VFIOLegacyContainer *container = vfio_eeh_as_container(as); > - > - return (container != NULL) && vfio_eeh_container_ok(container); > -} > - > -static int vfio_eeh_as_op(AddressSpace *as, uint32_t op) > -{ > - VFIOLegacyContainer *container = vfio_eeh_as_container(as); > - > - if (!container) { > - return -ENODEV; > - } > - return vfio_eeh_container_op(container, op); > -} > - > -bool spapr_phb_eeh_available(SpaprPhbState *sphb) > -{ > - return vfio_eeh_as_ok(&sphb->iommu_as); > -} > - > -static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb) > -{ > - vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE); > -} > > void spapr_phb_vfio_reset(DeviceState *qdev) > { > @@ -146,260 +38,3 @@ void spapr_phb_vfio_reset(DeviceState *qdev) > */ > spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev)); > } > - > -static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev, > - void *opaque) > -{ > - bool *found = opaque; > - > - if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { > - *found = true; > - } > -} > - > -int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, > - unsigned int addr, int option) > -{ > - uint32_t op; > - int ret; > - > - switch (option) { > - case RTAS_EEH_DISABLE: > - op = VFIO_EEH_PE_DISABLE; > - break; > - case RTAS_EEH_ENABLE: { > - PCIHostState *phb; > - bool found = false; > - > - /* > - * The EEH functionality is enabled per sphb level instead of > - * per PCI device. We have already identified this specific sphb > - * based on buid passed as argument to ibm,set-eeh-option rtas > - * call. Now we just need to check the validity of the PCI > - * pass-through devices (vfio-pci) under this sphb bus. > - * We have already validated that all the devices under this sphb > - * are from same iommu group (within same PE) before coming here. > - * > - * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh: > - * Rework device EEH PE determination") kernel would call > - * eeh-set-option for each device in the PE using the device's > - * config_address as the argument rather than the PE address. > - * Hence if we check validity of supplied config_addr whether > - * it matches to this PHB will cause issues with older kernel > - * versions v5.9 and older. If we return an error from > - * eeh-set-option when the argument isn't a valid PE address > - * then older kernels (v5.9 and older) will interpret that as > - * EEH not being supported. > - */ > - phb = PCI_HOST_BRIDGE(sphb); > - pci_for_each_device(phb->bus, (addr >> 16) & 0xFF, > - spapr_eeh_pci_find_device, &found); > - > - if (!found) { > - return RTAS_OUT_PARAM_ERROR; > - } > - > - op = VFIO_EEH_PE_ENABLE; > - break; > - } > - case RTAS_EEH_THAW_IO: > - op = VFIO_EEH_PE_UNFREEZE_IO; > - break; > - case RTAS_EEH_THAW_DMA: > - op = VFIO_EEH_PE_UNFREEZE_DMA; > - break; > - default: > - return RTAS_OUT_PARAM_ERROR; > - } > - > - ret = vfio_eeh_as_op(&sphb->iommu_as, op); > - if (ret < 0) { > - return RTAS_OUT_HW_ERROR; > - } > - > - return RTAS_OUT_SUCCESS; > -} > - > -int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) > -{ > - int ret; > - > - ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE); > - if (ret < 0) { > - return RTAS_OUT_PARAM_ERROR; > - } > - > - *state = ret; > - return RTAS_OUT_SUCCESS; > -} > - > -static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus, > - PCIDevice *pdev, > - void *opaque) > -{ > - /* Check if the device is VFIO PCI device */ > - if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { > - return; > - } > - > - /* > - * The MSIx table will be cleaned out by reset. We need > - * disable it so that it can be reenabled properly. Also, > - * the cached MSIx table should be cleared as it's not > - * reflecting the contents in hardware. > - */ > - if (msix_enabled(pdev)) { > - uint16_t flags; > - > - flags = pci_host_config_read_common(pdev, > - pdev->msix_cap + PCI_MSIX_FLAGS, > - pci_config_size(pdev), 2); > - flags &= ~PCI_MSIX_FLAGS_ENABLE; > - pci_host_config_write_common(pdev, > - pdev->msix_cap + PCI_MSIX_FLAGS, > - pci_config_size(pdev), flags, 2); > - } > - > - msix_reset(pdev); > -} > - > -static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque) > -{ > - pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix, > - NULL); > -} > - > -static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb) > -{ > - PCIHostState *phb = PCI_HOST_BRIDGE(sphb); > - > - pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL); > -} > - > -int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) > -{ > - uint32_t op; > - int ret; > - > - switch (option) { > - case RTAS_SLOT_RESET_DEACTIVATE: > - op = VFIO_EEH_PE_RESET_DEACTIVATE; > - break; > - case RTAS_SLOT_RESET_HOT: > - spapr_phb_vfio_eeh_pre_reset(sphb); > - op = VFIO_EEH_PE_RESET_HOT; > - break; > - case RTAS_SLOT_RESET_FUNDAMENTAL: > - spapr_phb_vfio_eeh_pre_reset(sphb); > - op = VFIO_EEH_PE_RESET_FUNDAMENTAL; > - break; > - default: > - return RTAS_OUT_PARAM_ERROR; > - } > - > - ret = vfio_eeh_as_op(&sphb->iommu_as, op); > - if (ret < 0) { > - return RTAS_OUT_HW_ERROR; > - } > - > - return RTAS_OUT_SUCCESS; > -} > - > -int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) > -{ > - int ret; > - > - ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE); > - if (ret < 0) { > - return RTAS_OUT_PARAM_ERROR; > - } > - > - return RTAS_OUT_SUCCESS; > -} > - > -int spapr_phb_vfio_errinjct(SpaprPhbState *sphb, > - uint32_t func, uint64_t addr, > - uint64_t mask, uint32_t type) > -{ > - VFIOLegacyContainer *container = vfio_eeh_as_container(&sphb->iommu_as); > - struct vfio_eeh_pe_op op = { > - .op = VFIO_EEH_PE_INJECT_ERR, > - .argsz = sizeof(op), > - }; > - > - /* Set error type, address, and mask */ > - op.err.type = type; > - op.err.addr = addr; > - op.err.mask = mask; > - > - /* Validate and set function code */ > - switch (func) { > - case EEH_ERR_FUNC_LD_MEM_ADDR: > - case EEH_ERR_FUNC_LD_MEM_DATA: > - case EEH_ERR_FUNC_LD_IO_ADDR: > - case EEH_ERR_FUNC_LD_IO_DATA: > - case EEH_ERR_FUNC_LD_CFG_ADDR: > - case EEH_ERR_FUNC_LD_CFG_DATA: > - case EEH_ERR_FUNC_ST_MEM_ADDR: > - case EEH_ERR_FUNC_ST_MEM_DATA: > - case EEH_ERR_FUNC_ST_IO_ADDR: > - case EEH_ERR_FUNC_ST_IO_DATA: > - case EEH_ERR_FUNC_ST_CFG_ADDR: > - case EEH_ERR_FUNC_ST_CFG_DATA: > - case EEH_ERR_FUNC_DMA_RD_ADDR: > - case EEH_ERR_FUNC_DMA_RD_DATA: > - case EEH_ERR_FUNC_DMA_RD_MASTER: > - case EEH_ERR_FUNC_DMA_RD_TARGET: > - case EEH_ERR_FUNC_DMA_WR_ADDR: > - case EEH_ERR_FUNC_DMA_WR_DATA: > - case EEH_ERR_FUNC_DMA_WR_MASTER: > - op.err.func = func; > - break; > - default: > - return RTAS_OUT_PARAM_ERROR; > - } > - > - /* Perform the ioctl to inject the error */ > - if (ioctl(container->fd, VFIO_EEH_PE_OP, &op) < 0) { > - return RTAS_OUT_HW_ERROR; > - } > - > - return RTAS_OUT_SUCCESS; > -} > -#else > - > -bool spapr_phb_eeh_available(SpaprPhbState *sphb) > -{ > - return false; > -} > - > -void spapr_phb_vfio_reset(DeviceState *qdev) > -{ > -} > - > -int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, > - unsigned int addr, int option) > -{ > - return RTAS_OUT_NOT_SUPPORTED; > -} > - > -int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) > -{ > - return RTAS_OUT_NOT_SUPPORTED; > -} > - > -int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) > -{ > - return RTAS_OUT_NOT_SUPPORTED; > -} > - > -int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) > -{ > - return RTAS_OUT_NOT_SUPPORTED; > -} > - > -int spapr_phb_vfio_errinjct(SpaprPhbState *sphb, int option) > -{ > - return RTAS_OUT_NOT_SUPPORTED; > -} > -#endif /* CONFIG_VFIO_PCI */ > diff --git a/hw/ppc/spapr_pci_vfio_eeh.c b/hw/ppc/spapr_pci_vfio_eeh.c > new file mode 100644 > index 0000000000..6d07ae50c5 > --- /dev/null > +++ b/hw/ppc/spapr_pci_vfio_eeh.c > @@ -0,0 +1,346 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > + > +/* > + * QEMU sPAPR PCI VFIO EEH support > + */ > + > +#include "qemu/osdep.h" > +#include <sys/ioctl.h> > +#include <linux/vfio.h> > +#include "hw/ppc/spapr.h" > +#include "hw/pci-host/spapr.h" > +#include "hw/ppc/spapr_vfio.h" > +#include "hw/pci/msix.h" > +#include "hw/pci/pci_device.h" > +#include "hw/vfio/vfio-container-legacy.h" > +#include "qemu/error-report.h" > +#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
This include can now be removed, since associated ifdefs were already removed. With that, Reviewed-by: Pierrick Bouvier <[email protected]> Also, build was correctly fixed and series passes all tests also.👍 https://github.com/p-b-o/qemu-ci/actions/runs/26162641798 Regards, Pierrick
