On Tue, 2015-06-02 at 15:54 +0800, Chen Fan wrote:
> On 05/28/2015 05:32 AM, Alex Williamson wrote:
> > On Wed, 2015-05-27 at 10:46 +0800, Chen Fan wrote:
> >> we introduce a has_bus_reset capability to sign the vfio
> >> devices if support host bus reset.
> >>
> >> Signed-off-by: Chen Fan <chen.fan.f...@cn.fujitsu.com>
> >> ---
> >>   hw/vfio/pci.c | 123 
> >> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>   1 file changed, 123 insertions(+)
> >>
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index f4e7855..5934fd7 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -33,6 +33,7 @@
> >>   #include "hw/pci/msix.h"
> >>   #include "hw/pci/pci.h"
> >>   #include "hw/pci/pci_bridge.h"
> >> +#include "hw/pci/pci_bus.h"
> >>   #include "qemu-common.h"
> >>   #include "qemu/error-report.h"
> >>   #include "qemu/event_notifier.h"
> >> @@ -170,6 +171,7 @@ typedef struct VFIOPCIDevice {
> >>       bool req_enabled;
> >>       bool has_flr;
> >>       bool has_pm_reset;
> >> +    bool has_bus_reset;
> > I still think that caching this is a bad idea, there's no point at which
> > we can blindly assume the capability is still present.
> >
> >>       bool rom_read_failed;
> >>   } VFIOPCIDevice;
> >>   
> >> @@ -203,6 +205,7 @@ static uint32_t vfio_pci_read_config(PCIDevice *pdev, 
> >> uint32_t addr, int len);
> >>   static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
> >>                                     uint32_t val, int len);
> >>   static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
> >> +static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev);
> >>   
> >>   /*
> >>    * Disabling BAR mmaping can be slow, but toggling it around INTx can
> >> @@ -2853,6 +2856,20 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int 
> >> pos, uint16_t size)
> >>           dev_iter = pci_bridge_get_device(dev_iter->bus);
> >>       }
> >>   
> >> +    /*
> >> +     * Don't check bus reset capability when device is enabled during
> >> +     * qemu machine creation, which is done by machine init function.
> >> +     */
> >> +    if (DEVICE(vdev)->hotplugged) {
> >> +        vfio_check_host_bus_reset(vdev);
> >> +        if (!vdev->has_bus_reset) {
> >> +            error_report("vfio: Cannot enable AER for device %s, "
> >> +                         "which is not support host bus reset.",
> > "which does not support host bus reset."
> >
> >> +                         vdev->vbasedev.name);
> >> +            goto error;
> >> +        }
> >> +    }
> >> +
> >>       errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 
> >> 4);
> >>       /*
> >>        * The ability to record multiple headers is depending on
> >> @@ -3678,6 +3695,112 @@ static void vfio_setup_resetfn(VFIOPCIDevice *vdev)
> >>       }
> >>   }
> >>   
> >> +struct VfioDeviceFind {
> > We use VFIOFooBar for all other camel case definitions, much like PCIBus
> > and PCIDevice below.
> >
> >> +    PCIBus *pbus;
> >> +    PCIDevice *pdev;
> >> +    bool found;
> >> +};
> >> +
> >> +static void find_devices(PCIBus *bus, void *opaque)
> >> +{
> >> +    struct VfioDeviceFind *find = opaque;
> >> +    int i;
> >> +
> >> +    if (find->found == true) {
> > if (find->found) {...
> >
> >> +        return;
> >> +    }
> >> +
> >> +    for (i = 0; i < ARRAY_SIZE(bus->devices); i++) {
> >> +        if (!bus->devices[i]) {
> >> +            continue;
> >> +        }
> >> +
> >> +        if (bus->devices[i] == find->pdev) {
> >> +            find->pbus = bus;
> >> +            find->found = true;
> >> +            break;
> >> +        }
> >> +    }
> >> +}
> >> +
> >> +static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> >> +{
> >> +    PCIBus *bus = vdev->pdev.bus;
> >> +    struct vfio_pci_hot_reset_info *info = NULL;
> >> +    struct vfio_pci_dependent_device *devices;
> >> +    VFIOGroup *group;
> >> +    int ret, i;
> >> +    bool has_bus_reset = false;
> >> +
> >> +    ret = vfio_get_hot_reset_info(vdev, &info);
> >> +    if (ret < 0) {
> > if (ret) {...
> >
> >> +        goto out;
> >> +    }
> >> +
> >> +    /* List all affected devices by bus reset */
> >> +    devices = &info->devices[0];
> >> +
> >> +    /* Verify that we have all the groups required */
> >> +    for (i = 0; i < info->count; i++) {
> >> +        PCIHostDeviceAddress host;
> >> +        VFIOPCIDevice *tmp;
> >> +        VFIODevice *vbasedev_iter;
> >> +
> >> +        host.domain = devices[i].segment;
> >> +        host.bus = devices[i].bus;
> >> +        host.slot = PCI_SLOT(devices[i].devfn);
> >> +        host.function = PCI_FUNC(devices[i].devfn);
> >> +
> >> +        /* Skip the current device */
> >> +        if (vfio_pci_host_match(&host, &vdev->host)) {
> >> +            continue;
> >> +        }
> >> +
> >> +        /* Ensure we own the group of the affected device */
> >> +        QLIST_FOREACH(group, &vfio_group_list, next) {
> >> +            if (group->groupid == devices[i].group_id) {
> >> +                break;
> >> +            }
> >> +        }
> >> +
> >> +        if (!group) {
> >> +            goto out;
> >> +        }
> >> +
> >> +        /* Ensure affected devices for reset under the same bus */
> >> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> >> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> >> +                continue;
> >> +            }
> >> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> >> +            if (vfio_pci_host_match(&host, &tmp->host)) {
> >> +                struct VfioDeviceFind find = { .pdev = &tmp->pdev, .found 
> >> = false };
> >> +
> >> +                pci_for_each_bus(bus, find_devices, &find);
> >> +                if (!find.found) {
> >> +                    goto out;
> >> +                }
> >> +                /*
> >> +                 * When the check device is hotplugged to a higher bus 
> >> again,
> >> +                 * which would influence the affected device which enable 
> >> aer
> >> +                 * below the bus.
> >> +                 */
> >> +                if (tmp->features & VFIO_FEATURE_ENABLE_AER &&
> >> +                    find.pbus != bus) {
> >> +                    goto out;
> >> +                }
> > I think what you're trying to do here is assume that if a reset of A
> > affects B, then a reset of B affects A, and if B is on a subordinate bus
> > from A, then our configuration is broken, right?  Can we really
> > guarantee that assumption?  If we had a physical topology that mirrored
> > this virtual topology, that wouldn't necessarily be true.  For instance,
> > if A was a function of a multi-function device where another function
> > was a PCIe upstream switch, B could be subordinate to that switch, so a
> > reset of A affects B, but a reset of B doesn't affect A.
> >
> >> +                break;
> >> +            }
> >> +        }
> >> +    }
> >> +
> >> +    has_bus_reset = true;
> >> +
> >> +out:
> >> +    vdev->has_bus_reset = has_bus_reset;
> > I don't see any value is storing this, it can't be trusted at any point
> > in the future.  I think that any time we add a device or think about
> > forwarding an AER message to the guest, we need to do a validation pass,
> > testing the entire topology.
> >
> > To do that we'd iterate through every device in every group for PCI
> > devices that support AER.  For each one, get the hot reset info for the
> > affected device list.  For each affected device, if it's attached to the
> > VM, it needs to be on or below the bus of the target device.
> > Additionally, we should test all of the non-AER supporting vfio-pci
> > devices on or below the target bus to verify they have a reset
> > mechanism.  Run that function for each vfio-pci device as it's added,
> > regardless of whether it's hotplug.  If the test fails, fail the initfn
> > for the current device.  Also run the test prior to AER injection, if it
> > fails demote the AER injection to a machine halt as we have now.
> I'm worry about is the case that the affected devices belonged to
> another groups but when initialize this device the another group
> has not been added. it would cause fail the initfn, but the group
> maybe added later. so I used the machine done event notifier to
> check this case. if we don't do like that. how can we check the
> case when all vfio-pci devices initfn ?

That's why the initfn test needs to test the entire topology, not just
the device being added.  If we have the case you describe where we have
two devices in separate groups, we add the first device, test the
topology, see that the second device is affected but not yet added and
allow AER to be enabled.  When the second device is added, we again test
the topology, we see the potential conflict and fail the initfn for the
second device if the bus requirements are not met.

I don't see the advantage of using a machine init done notifier.  You
can perform fewer topology verification passes using that notifier, but
I think you can provide a more useful error message by testing on each
device addition.  We also use the exact same strategy for cold-plug and
hot-plug, which makes maintenance easier.  Thanks,

Alex


Reply via email to