On 5/20/2026 2:54 AM, Narayana Murty N wrote:
> Split spapr_pci_vfio.c into two files to separate concerns:
> - spapr_pci_vfio.c: Contains general VFIO routines
> - spapr_pci_vfio_eeh.c: Contains EEH-specific routines
> 
> Additionally, consolidate VFIO EEH function declarations into a new
> header file (spapr_vfio.h) to improve modularity and reduce header
> dependencies.
> 
> Changes:
> - Split VFIO functionality: keep general VFIO routines in
>   spapr_pci_vfio.c and move EEH routines to spapr_pci_vfio_eeh.c
> - Created include/hw/ppc/spapr_vfio.h with forward declarations
>   to avoid pulling in full spapr headers and libfdt dependencies
> - Introduced stubs/spapr_pci_vfio-stubs.c to consolidate all VFIO,
>   VFIO EEH stub functions in one place
> - Updated hw/ppc/spapr_pci.c to include new spapr_vfio.h header
> - Updated stubs/meson.build to reference new stub file
> 
> This improves code organization by separating VFIO and EEH concerns,
> and enhances build system modularity by making it easier to maintain
> VFIO-related code separately from core sPAPR PCI code.
> 
> Signed-off-by: Narayana Murty N <[email protected]>
> ---
>  hw/ppc/Kconfig               |   2 +-
>  hw/ppc/meson.build           |   1 +
>  hw/ppc/spapr_pci.c           |   1 +
>  hw/ppc/spapr_pci_vfio.c      | 367 +----------------------------------
>  hw/ppc/spapr_pci_vfio_eeh.c  | 346 +++++++++++++++++++++++++++++++++
>  include/hw/pci-host/spapr.h  |  44 +----
>  include/hw/ppc/spapr_vfio.h  |  28 +++
>  stubs/meson.build            |   1 +
>  stubs/spapr_phb_vfio-stubs.c |  52 +++++
>  9 files changed, 432 insertions(+), 410 deletions(-)
>  create mode 100644 hw/ppc/spapr_pci_vfio_eeh.c
>  create mode 100644 include/hw/ppc/spapr_vfio.h
>  create mode 100644 stubs/spapr_phb_vfio-stubs.c
> 
> diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
> index 347dcce690..1fb191fe83 100644
> --- a/hw/ppc/Kconfig
> +++ b/hw/ppc/Kconfig
> @@ -6,7 +6,7 @@ config PSERIES
>      imply PCI_DEVICES
>      imply TEST_DEVICES
>      imply VIRTIO_VGA
> -    imply VFIO_PCI if LINUX   # needed by spapr_pci_vfio.c
> +    imply VFIO_PCI if LINUX   # needed by spapr_pci_vfio.c and 
> spapr_pci_vfio_eeh.c
>      select NVDIMM
>      select DIMM
>      select PCI
> diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
> index 37aa535db2..97e4be0dc9 100644
> --- a/hw/ppc/meson.build
> +++ b/hw/ppc/meson.build
> @@ -36,6 +36,7 @@ ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: 
> files('spapr_rng.c'))
>  if host_os == 'linux'
>    ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files(
>      'spapr_pci_vfio.c',
> +    'spapr_pci_vfio_eeh.c',
>    ))
>  endif
>  
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index f08f21f03c..576b92229b 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -33,6 +33,7 @@
>  #include "hw/pci/msix.h"
>  #include "hw/pci/pci_host.h"
>  #include "hw/ppc/spapr.h"
> +#include "hw/ppc/spapr_vfio.h"
>  #include "hw/pci-host/spapr.h"
>  #include <libfdt.h>
>  #include "trace.h"
> diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
> index ed0b22a84a..2207654d83 100644
> --- a/hw/ppc/spapr_pci_vfio.c
> +++ b/hw/ppc/spapr_pci_vfio.c
> @@ -22,119 +22,11 @@
>  #include <linux/vfio.h>
>  #include "hw/ppc/spapr.h"
>  #include "hw/pci-host/spapr.h"
> +#include "hw/ppc/spapr_vfio.h"
>  #include "hw/pci/msix.h"
>  #include "hw/pci/pci_device.h"
>  #include "hw/vfio/vfio-container-legacy.h"
>  #include "qemu/error-report.h"
> -#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
> -
> -/*
> - * Interfaces for IBM EEH (Enhanced Error Handling)
> - */
> -#ifdef CONFIG_VFIO_PCI
> -static bool vfio_eeh_container_ok(VFIOLegacyContainer *container)
> -{
> -    /*
> -     * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
> -     * implementation is broken if there are multiple groups in a
> -     * container.  The hardware works in units of Partitionable
> -     * Endpoints (== IOMMU groups) and the EEH operations naively
> -     * iterate across all groups in the container, without any logic
> -     * to make sure the groups have their state synchronized.  For
> -     * certain operations (ENABLE) that might be ok, until an error
> -     * occurs, but for others (GET_STATE) it's clearly broken.
> -     */
> -
> -    /*
> -     * XXX Once fixed kernels exist, test for them here
> -     */
> -
> -    if (QLIST_EMPTY(&container->group_list)) {
> -        return false;
> -    }
> -
> -    if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
> -        return false;
> -    }
> -
> -    return true;
> -}
> -
> -static int vfio_eeh_container_op(VFIOLegacyContainer *container, uint32_t op)
> -{
> -    struct vfio_eeh_pe_op pe_op = {
> -        .argsz = sizeof(pe_op),
> -        .op = op,
> -    };
> -    int ret;
> -
> -    if (!vfio_eeh_container_ok(container)) {
> -        error_report("vfio/eeh: EEH_PE_OP 0x%x: "
> -                     "kernel requires a container with exactly one group", 
> op);
> -        return -EPERM;
> -    }
> -
> -    ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
> -    if (ret < 0) {
> -        error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
> -        return -errno;
> -    }
> -
> -    return ret;
> -}
> -
> -static VFIOLegacyContainer *vfio_eeh_as_container(AddressSpace *as)
> -{
> -    VFIOAddressSpace *space = vfio_address_space_get(as);
> -    VFIOContainer *bcontainer = NULL;
> -
> -    if (QLIST_EMPTY(&space->containers)) {
> -        /* No containers to act on */
> -        goto out;
> -    }
> -
> -    bcontainer = QLIST_FIRST(&space->containers);
> -
> -    if (QLIST_NEXT(bcontainer, next)) {
> -        /*
> -         * We don't yet have logic to synchronize EEH state across
> -         * multiple containers
> -         */
> -        bcontainer = NULL;
> -        goto out;
> -    }
> -
> -out:
> -    vfio_address_space_put(space);
> -    return VFIO_IOMMU_LEGACY(bcontainer);
> -}
> -
> -static bool vfio_eeh_as_ok(AddressSpace *as)
> -{
> -    VFIOLegacyContainer *container = vfio_eeh_as_container(as);
> -
> -    return (container != NULL) && vfio_eeh_container_ok(container);
> -}
> -
> -static int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
> -{
> -    VFIOLegacyContainer *container = vfio_eeh_as_container(as);
> -
> -    if (!container) {
> -        return -ENODEV;
> -    }
> -    return vfio_eeh_container_op(container, op);
> -}
> -
> -bool spapr_phb_eeh_available(SpaprPhbState *sphb)
> -{
> -    return vfio_eeh_as_ok(&sphb->iommu_as);
> -}
> -
> -static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb)
> -{
> -    vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE);
> -}
>  
>  void spapr_phb_vfio_reset(DeviceState *qdev)
>  {
> @@ -146,260 +38,3 @@ void spapr_phb_vfio_reset(DeviceState *qdev)
>       */
>      spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
>  }
> -
> -static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev,
> -                                      void *opaque)
> -{
> -    bool *found = opaque;
> -
> -    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
> -        *found = true;
> -    }
> -}
> -
> -int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
> -                                  unsigned int addr, int option)
> -{
> -    uint32_t op;
> -    int ret;
> -
> -    switch (option) {
> -    case RTAS_EEH_DISABLE:
> -        op = VFIO_EEH_PE_DISABLE;
> -        break;
> -    case RTAS_EEH_ENABLE: {
> -        PCIHostState *phb;
> -        bool found = false;
> -
> -        /*
> -         * The EEH functionality is enabled per sphb level instead of
> -         * per PCI device. We have already identified this specific sphb
> -         * based on buid passed as argument to ibm,set-eeh-option rtas
> -         * call. Now we just need to check the validity of the PCI
> -         * pass-through devices (vfio-pci) under this sphb bus.
> -         * We have already validated that all the devices under this sphb
> -         * are from same iommu group (within same PE) before coming here.
> -         *
> -         * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
> -         * Rework device EEH PE determination") kernel would call
> -         * eeh-set-option for each device in the PE using the device's
> -         * config_address as the argument rather than the PE address.
> -         * Hence if we check validity of supplied config_addr whether
> -         * it matches to this PHB will cause issues with older kernel
> -         * versions v5.9 and older. If we return an error from
> -         * eeh-set-option when the argument isn't a valid PE address
> -         * then older kernels (v5.9 and older) will interpret that as
> -         * EEH not being supported.
> -         */
> -        phb = PCI_HOST_BRIDGE(sphb);
> -        pci_for_each_device(phb->bus, (addr >> 16) & 0xFF,
> -                            spapr_eeh_pci_find_device, &found);
> -
> -        if (!found) {
> -            return RTAS_OUT_PARAM_ERROR;
> -        }
> -
> -        op = VFIO_EEH_PE_ENABLE;
> -        break;
> -    }
> -    case RTAS_EEH_THAW_IO:
> -        op = VFIO_EEH_PE_UNFREEZE_IO;
> -        break;
> -    case RTAS_EEH_THAW_DMA:
> -        op = VFIO_EEH_PE_UNFREEZE_DMA;
> -        break;
> -    default:
> -        return RTAS_OUT_PARAM_ERROR;
> -    }
> -
> -    ret = vfio_eeh_as_op(&sphb->iommu_as, op);
> -    if (ret < 0) {
> -        return RTAS_OUT_HW_ERROR;
> -    }
> -
> -    return RTAS_OUT_SUCCESS;
> -}
> -
> -int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
> -{
> -    int ret;
> -
> -    ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE);
> -    if (ret < 0) {
> -        return RTAS_OUT_PARAM_ERROR;
> -    }
> -
> -    *state = ret;
> -    return RTAS_OUT_SUCCESS;
> -}
> -
> -static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus,
> -                                              PCIDevice *pdev,
> -                                              void *opaque)
> -{
> -    /* Check if the device is VFIO PCI device */
> -    if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
> -        return;
> -    }
> -
> -    /*
> -     * The MSIx table will be cleaned out by reset. We need
> -     * disable it so that it can be reenabled properly. Also,
> -     * the cached MSIx table should be cleared as it's not
> -     * reflecting the contents in hardware.
> -     */
> -    if (msix_enabled(pdev)) {
> -        uint16_t flags;
> -
> -        flags = pci_host_config_read_common(pdev,
> -                                            pdev->msix_cap + PCI_MSIX_FLAGS,
> -                                            pci_config_size(pdev), 2);
> -        flags &= ~PCI_MSIX_FLAGS_ENABLE;
> -        pci_host_config_write_common(pdev,
> -                                     pdev->msix_cap + PCI_MSIX_FLAGS,
> -                                     pci_config_size(pdev), flags, 2);
> -    }
> -
> -    msix_reset(pdev);
> -}
> -
> -static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
> -{
> -       pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix,
> -                                     NULL);
> -}
> -
> -static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb)
> -{
> -       PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
> -
> -       pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
> -}
> -
> -int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
> -{
> -    uint32_t op;
> -    int ret;
> -
> -    switch (option) {
> -    case RTAS_SLOT_RESET_DEACTIVATE:
> -        op = VFIO_EEH_PE_RESET_DEACTIVATE;
> -        break;
> -    case RTAS_SLOT_RESET_HOT:
> -        spapr_phb_vfio_eeh_pre_reset(sphb);
> -        op = VFIO_EEH_PE_RESET_HOT;
> -        break;
> -    case RTAS_SLOT_RESET_FUNDAMENTAL:
> -        spapr_phb_vfio_eeh_pre_reset(sphb);
> -        op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
> -        break;
> -    default:
> -        return RTAS_OUT_PARAM_ERROR;
> -    }
> -
> -    ret = vfio_eeh_as_op(&sphb->iommu_as, op);
> -    if (ret < 0) {
> -        return RTAS_OUT_HW_ERROR;
> -    }
> -
> -    return RTAS_OUT_SUCCESS;
> -}
> -
> -int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
> -{
> -    int ret;
> -
> -    ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE);
> -    if (ret < 0) {
> -        return RTAS_OUT_PARAM_ERROR;
> -    }
> -
> -    return RTAS_OUT_SUCCESS;
> -}
> -
> -int spapr_phb_vfio_errinjct(SpaprPhbState *sphb,
> -                            uint32_t func, uint64_t addr,
> -                            uint64_t mask, uint32_t type)
> -{
> -    VFIOLegacyContainer *container = vfio_eeh_as_container(&sphb->iommu_as);
> -    struct vfio_eeh_pe_op op = {
> -        .op = VFIO_EEH_PE_INJECT_ERR,
> -        .argsz = sizeof(op),
> -    };
> -
> -    /* Set error type, address, and mask */
> -    op.err.type = type;
> -    op.err.addr = addr;
> -    op.err.mask = mask;
> -
> -    /* Validate and set function code */
> -    switch (func) {
> -    case EEH_ERR_FUNC_LD_MEM_ADDR:
> -    case EEH_ERR_FUNC_LD_MEM_DATA:
> -    case EEH_ERR_FUNC_LD_IO_ADDR:
> -    case EEH_ERR_FUNC_LD_IO_DATA:
> -    case EEH_ERR_FUNC_LD_CFG_ADDR:
> -    case EEH_ERR_FUNC_LD_CFG_DATA:
> -    case EEH_ERR_FUNC_ST_MEM_ADDR:
> -    case EEH_ERR_FUNC_ST_MEM_DATA:
> -    case EEH_ERR_FUNC_ST_IO_ADDR:
> -    case EEH_ERR_FUNC_ST_IO_DATA:
> -    case EEH_ERR_FUNC_ST_CFG_ADDR:
> -    case EEH_ERR_FUNC_ST_CFG_DATA:
> -    case EEH_ERR_FUNC_DMA_RD_ADDR:
> -    case EEH_ERR_FUNC_DMA_RD_DATA:
> -    case EEH_ERR_FUNC_DMA_RD_MASTER:
> -    case EEH_ERR_FUNC_DMA_RD_TARGET:
> -    case EEH_ERR_FUNC_DMA_WR_ADDR:
> -    case EEH_ERR_FUNC_DMA_WR_DATA:
> -    case EEH_ERR_FUNC_DMA_WR_MASTER:
> -        op.err.func = func;
> -        break;
> -    default:
> -        return RTAS_OUT_PARAM_ERROR;
> -    }
> -
> -    /* Perform the ioctl to inject the error */
> -    if (ioctl(container->fd, VFIO_EEH_PE_OP, &op) < 0) {
> -        return RTAS_OUT_HW_ERROR;
> -    }
> -
> -    return RTAS_OUT_SUCCESS;
> -}
> -#else
> -
> -bool spapr_phb_eeh_available(SpaprPhbState *sphb)
> -{
> -    return false;
> -}
> -
> -void spapr_phb_vfio_reset(DeviceState *qdev)
> -{
> -}
> -
> -int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
> -                                  unsigned int addr, int option)
> -{
> -    return RTAS_OUT_NOT_SUPPORTED;
> -}
> -
> -int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
> -{
> -    return RTAS_OUT_NOT_SUPPORTED;
> -}
> -
> -int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
> -{
> -    return RTAS_OUT_NOT_SUPPORTED;
> -}
> -
> -int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
> -{
> -    return RTAS_OUT_NOT_SUPPORTED;
> -}
> -
> -int spapr_phb_vfio_errinjct(SpaprPhbState *sphb, int option)
> -{
> -    return RTAS_OUT_NOT_SUPPORTED;
> -}
> -#endif /* CONFIG_VFIO_PCI */
> diff --git a/hw/ppc/spapr_pci_vfio_eeh.c b/hw/ppc/spapr_pci_vfio_eeh.c
> new file mode 100644
> index 0000000000..6d07ae50c5
> --- /dev/null
> +++ b/hw/ppc/spapr_pci_vfio_eeh.c
> @@ -0,0 +1,346 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +/*
> + * QEMU sPAPR PCI VFIO EEH support
> + */
> +
> +#include "qemu/osdep.h"
> +#include <sys/ioctl.h>
> +#include <linux/vfio.h>
> +#include "hw/ppc/spapr.h"
> +#include "hw/pci-host/spapr.h"
> +#include "hw/ppc/spapr_vfio.h"
> +#include "hw/pci/msix.h"
> +#include "hw/pci/pci_device.h"
> +#include "hw/vfio/vfio-container-legacy.h"
> +#include "qemu/error-report.h"
> +#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */

This include can now be removed, since associated ifdefs were already
removed.

With that,
Reviewed-by: Pierrick Bouvier <[email protected]>

Also, build was correctly fixed and series passes all tests also.👍
https://github.com/p-b-o/qemu-ci/actions/runs/26162641798

Regards,
Pierrick

Reply via email to