On Tue, 15 Mar 2016 09:35:45 +0800 Cao jin <caoj.f...@cn.fujitsu.com> wrote:
> From: Chen Fan <chen.fan.f...@cn.fujitsu.com> > > when boot up a VM that assigning vfio devices with aer enabled, we > must check the vfio device whether support host bus reset. because > when one error occur. OS driver always recover the device by do a > bus reset, in order to recover the vfio device, qemu must able to do > a host bus reset to recover the device to default status. and for all > affected devices by the bus reset. we must check them whether all > are assigned to the VM and on the same virtual bus. meanwhile, for > simply done, the devices which don't affected by the host bus reset > are not allowed to assign to the same virtual bus. > > Signed-off-by: Chen Fan <chen.fan.f...@cn.fujitsu.com> > --- > hw/vfio/pci.c | 219 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > hw/vfio/pci.h | 1 + > 2 files changed, 213 insertions(+), 7 deletions(-) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index a77cc04..9670271 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -1868,6 +1868,197 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, > uint8_t pos) > return 0; > } > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1, > + PCIHostDeviceAddress *host2) > +{ > + return (host1->domain == host2->domain && host1->bus == host2->bus && > + host1->slot == host2->slot); > +} > + > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > + PCIHostDeviceAddress *host2) > +{ > + return (vfio_pci_host_slot_match(host1, host2) && > + host1->function == host2->function); > +} This needs to be rebased to current qemu.git and account for: 7df9381 vfio: Add sysfsdev property for pci & platform It's possible that vdev.host is not populated if the device is specified via sysfsdev. vbasedev.name is now the common data element and I think for the purposes of a bus reset it's ok to expect it to take the form of "%04x:%02x:%02x.%d". The current vfio_pci_host_match function builds a comparison string and does a strcmp. You may want to change that to build a PCIHostDeviceAddress to make masking out the function number easier for a slot comparison. If we have devices that don't match the "%04x:%02x:%02x.%d" format, the expectation is that they won't support a hot reset ioctl. > + > +static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp) > +{ > + PCIBus *bus = vdev->pdev.bus; > + struct vfio_pci_hot_reset_info *info = NULL; > + struct vfio_pci_dependent_device *devices; > + VFIOGroup *group; > + int ret, i, devfn; > + > + ret = vfio_get_hot_reset_info(vdev, &info); > + if (ret) { > + error_setg(errp, "vfio: Cannot enable AER for device %s," > + " device does not support hot reset.", > + vdev->vbasedev.name); > + return; > + } > + > + /* List all affected devices by bus reset */ > + devices = &info->devices[0]; > + > + /* Verify that we have all the groups required */ > + for (i = 0; i < info->count; i++) { > + PCIHostDeviceAddress host; > + VFIOPCIDevice *tmp; > + VFIODevice *vbasedev_iter; > + bool found = false; > + > + host.domain = devices[i].segment; > + host.bus = devices[i].bus; > + host.slot = PCI_SLOT(devices[i].devfn); > + host.function = PCI_FUNC(devices[i].devfn); > + > + /* Skip the current device */ > + if (vfio_pci_host_match(&host, &vdev->host)) { > + continue; > + } > + > + /* Ensure we own the group of the affected device */ > + QLIST_FOREACH(group, &vfio_group_list, next) { > + if (group->groupid == devices[i].group_id) { > + break; > + } > + } > + > + if (!group) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, " > + "depends on group %d which is not owned.", > + vdev->vbasedev.name, devices[i].group_id); > + goto out; > + } > + > + /* Ensure affected devices for reset on the same bus */ > + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { > + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { > + continue; > + } > + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); > + if (vfio_pci_host_match(&host, &tmp->host)) { > + /* > + * AER errors may be broadcast to all functions of a multi- > + * function endpoint. If any of those sibling functions are > + * also assigned, they need to have AER enabled or else an > + * error may continue to cause a vm_stop condition. IOW, > + * AER setup of this function would be pointless. > + */ > + if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) && > + !(tmp->features & VFIO_FEATURE_ENABLE_AER)) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, > on same slot" > + " the dependent device %s which does not > enable AER.", > + vdev->vbasedev.name, tmp->vbasedev.name); > + goto out; > + } > + > + if (tmp->pdev.bus != bus) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, > " > + "the dependent device %s is not on the same > bus", > + vdev->vbasedev.name, tmp->vbasedev.name); > + goto out; > + } > + found = true; > + break; > + } > + } > + > + /* Ensure all affected devices assigned to VM */ > + if (!found) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, " > + "the dependent device %04x:%02x:%02x.%x " > + "is not assigned to VM.", > + vdev->vbasedev.name, host.domain, host.bus, > + host.slot, host.function); > + goto out; > + } > + } > + > + /* > + * To make things simply, functions must are combined > + * in the same way as on the host. no other irrelative > + * functions on the virtual same bus was required. > + */ > + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { > + VFIOPCIDevice *tmp; > + PCIDevice *dev; > + bool found = false; > + > + dev = bus->devices[devfn]; > + if (!dev) { > + continue; > + } bus->devices seems like private data for PCIBus, MST do you want some abstraction for this? Can't this already be done in a round about sort of way with pci_for_each_device()? > + > + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, " > + "only support all dependent devices on the same bus, " > + "%s is not one of the dependent devices.", > + vdev->vbasedev.name, dev->name); > + goto out; > + } > + > + tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev); > + for (i = 0; i < info->count; i++) { > + PCIHostDeviceAddress host; > + > + host.domain = devices[i].segment; > + host.bus = devices[i].bus; > + host.slot = PCI_SLOT(devices[i].devfn); > + host.function = PCI_FUNC(devices[i].devfn); > + > + if (vfio_pci_host_match(&host, &tmp->host)) { > + found = true; > + break; > + } > + } > + > + if (!found) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, " > + "only support all dependent devices on the same bus, " > + "%04x:%02x:%02x.%x is not one of the dependent > devices.", > + vdev->vbasedev.name, tmp->host.domain, tmp->host.bus, > + tmp->host.slot, tmp->host.function); > + goto out; > + } > + } > + > +out: > + g_free(info); > + return; > +} > + > +static void vfio_aer_check_host_bus_reset(Error **errp) > +{ > + VFIOGroup *group; > + VFIODevice *vbasedev; > + VFIOPCIDevice *vdev; > + Error *local_err = NULL; > + > + /* Check All vfio-pci devices if have bus reset capability */ > + QLIST_FOREACH(group, &vfio_group_list, next) { > + QLIST_FOREACH(vbasedev, &group->device_list, next) { > + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { > + continue; > + } > + vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > + if (vdev->features & VFIO_FEATURE_ENABLE_AER) { > + if (!pci_get_function_0(&vdev->pdev)) { > + continue; > + } > + vfio_check_hot_bus_reset(vdev, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + } > + } > + } > + > + return; > +} > + > static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver, > int pos, uint16_t size) > { > @@ -2051,13 +2242,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) > vfio_intx_enable(vdev); > } > > -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > - PCIHostDeviceAddress *host2) > -{ > - return (host1->domain == host2->domain && host1->bus == host2->bus && > - host1->slot == host2->slot && host1->function == > host2->function); > -} > - > static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) > { > VFIOGroup *group; > @@ -2563,6 +2747,22 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice > *vdev) > vdev->req_enabled = false; > } > > +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused) > +{ > + Error *local_err = NULL; > + > + vfio_aer_check_host_bus_reset(&local_err); > + if (local_err) { > + fprintf(stderr, "%s\n", error_get_pretty(local_err)); > + error_free(local_err); > + exit(1); > + } > +} > + > +static Notifier machine_notifier = { > + .notify = vfio_pci_machine_done_notify, > +}; > + > static int vfio_initfn(PCIDevice *pdev) > { > VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > @@ -2909,6 +3109,11 @@ static const TypeInfo vfio_pci_dev_info = { > static void register_vfio_pci_dev_type(void) > { > type_register_static(&vfio_pci_dev_info); > + /* > + * Register notifier when machine init is done, since we need > + * check the configration manner after all vfio device are inited. > + */ > + qemu_add_machine_init_done_notifier(&machine_notifier); > } > > type_init(register_vfio_pci_dev_type) > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > index e0c53f2..aff46c2 100644 > --- a/hw/vfio/pci.h > +++ b/hw/vfio/pci.h > @@ -15,6 +15,7 @@ > #include "qemu-common.h" > #include "exec/memory.h" > #include "hw/pci/pci.h" > +#include "hw/pci/pci_bus.h" > #include "hw/pci/pci_bridge.h" > #include "hw/vfio/vfio-common.h" > #include "qemu/event_notifier.h"