On 6/26/24 02:28, Nicolin Chen wrote:
> With iommu=nested-smmuv3, there could be multiple nested SMMU instances in
> the vms. A passthrough device must to look up for its iommu handler in its
> sysfs node, and then link to the nested SMMU instance created for the same
> iommu handler. This isn't easy to do.
>
> Add an auto-assign piece after all vSMMU backed pxb buses are created. It
> loops the existing input devices, and sets/replaces their pci bus numbers
> with a newly created pcie-root-port to the pxb bus.
Here again I don't think it is acceptable to create such topology under
the hood. Libvirt shall master the whole PCIe topology.

Eric
>
> Note that this is not an ideal solution to handle hot plug device.
>
> Signed-off-by: Nicolin Chen <nicol...@nvidia.com>
> ---
>  hw/arm/virt.c         | 110 ++++++++++++++++++++++++++++++++++++++++++
>  include/hw/arm/virt.h |  13 +++++
>  2 files changed, 123 insertions(+)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index a54332fca8..3610f53304 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -38,6 +38,7 @@
>  #include "hw/arm/primecell.h"
>  #include "hw/arm/virt.h"
>  #include "hw/block/flash.h"
> +#include "hw/vfio/pci.h"
>  #include "hw/vfio/vfio-calxeda-xgmac.h"
>  #include "hw/vfio/vfio-amd-xgbe.h"
>  #include "hw/display/ramfb.h"
> @@ -1491,6 +1492,112 @@ static void 
> create_virtio_iommu_dt_bindings(VirtMachineState *vms)
>                             bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - 
> bdf);
>  }
>  
> +static char *create_new_pcie_port(VirtNestedSmmu *nested_smmu, Error **errp)
> +{
> +    uint32_t port_nr = nested_smmu->pci_bus->qbus.num_children;
> +    uint32_t chassis_nr = UINT8_MAX - nested_smmu->index;
> +    uint32_t bus_nr = pci_bus_num(nested_smmu->pci_bus);
> +    DeviceState *dev;
> +    char *name_port;
> +
> +    /* Create a root port */
> +    dev = qdev_new("pcie-root-port");
> +    name_port = g_strdup_printf("smmu_bus0x%x_port%d", bus_nr, port_nr);
> +
> +    if (!qdev_set_id(dev, name_port, &error_fatal)) {
> +        /* FIXME retry with a different port num? */
> +        error_setg(errp, "Could not set pcie-root-port ID %s", name_port);
> +        g_free(name_port);
> +        g_free(dev);
> +        return NULL;
> +    }
> +    qdev_prop_set_uint32(dev, "chassis", chassis_nr);
> +    qdev_prop_set_uint32(dev, "slot", port_nr);
> +    qdev_prop_set_uint64(dev, "io-reserve", 0);
> +    qdev_realize_and_unref(dev, BUS(nested_smmu->pci_bus), &error_fatal);
> +    return name_port;
> +}
> +
> +static int assign_nested_smmu(void *opaque, QemuOpts *opts, Error **errp)
> +{
> +    VirtMachineState *vms = (VirtMachineState *)opaque;
> +    const char *sysfsdev = qemu_opt_get(opts, "sysfsdev");
> +    const char *iommufd = qemu_opt_get(opts, "iommufd");
> +    const char *driver = qemu_opt_get(opts, "driver");
> +    const char *host = qemu_opt_get(opts, "host");
> +    const char *bus = qemu_opt_get(opts, "bus");
> +    VirtNestedSmmu *nested_smmu;
> +    char *link_iommu;
> +    char *dir_iommu;
> +    char *smmu_node;
> +    char *name_port;
> +    int ret = 0;
> +
> +    if (!iommufd || !driver) {
> +        return 0;
> +    }
> +    if (!sysfsdev && !host) {
> +        return 0;
> +    }
> +    if (strncmp(driver, TYPE_VFIO_PCI, strlen(TYPE_VFIO_PCI))) {
> +        return 0;
> +    }
> +    /* If the device wants to attach to the default bus, do not reassign it 
> */
> +    if (bus && !strncmp(bus, "pcie.0", strlen(bus))) {
> +        return 0;
> +    }
> +
> +    if (sysfsdev) {
> +        link_iommu = g_strdup_printf("%s/iommu", sysfsdev);
> +    } else {
> +        link_iommu = g_strdup_printf("/sys/bus/pci/devices/%s/iommu", host);
> +    }
> +
> +    dir_iommu = realpath(link_iommu, NULL);
> +    if (!dir_iommu) {
> +        error_setg(errp, "Could not get the real path for iommu link: %s",
> +                   link_iommu);
> +        ret = -EINVAL;
> +        goto free_link;
> +    }
> +
> +    smmu_node = g_path_get_basename(dir_iommu);
> +    if (!smmu_node) {
> +        error_setg(errp, "Could not get SMMU node name for iommu at: %s",
> +                   dir_iommu);
> +        ret = -EINVAL;
> +        goto free_dir;
> +    }
> +
> +    nested_smmu = find_nested_smmu_by_sysfs(vms, smmu_node);
> +    if (!nested_smmu) {
> +        error_setg(errp, "Could not find any detected SMMU matching node: 
> %s",
> +                   smmu_node);
> +        ret = -EINVAL;
> +        goto free_node;
> +    }
> +
> +    name_port = create_new_pcie_port(nested_smmu, errp);
> +    if (!name_port) {
> +        ret = -EBUSY;
> +        goto free_node;
> +    }
> +
> +    qemu_opt_set(opts, "bus", name_port, &error_fatal);
> +    if (bus) {
> +        error_report("overriding PCI bus %s to %s for device %s [%s]",
> +                     bus, name_port, host, sysfsdev);
> +    }
> +
> +free_node:
> +    free(smmu_node);
> +free_dir:
> +    free(dir_iommu);
> +free_link:
> +    free(link_iommu);
> +    return ret;
> +}
> +
>  /*
>   * FIXME this is used to reverse for hotplug devices, yet it could result in 
> a
>   * big waste of PCI bus numbners.
> @@ -1669,6 +1776,9 @@ static void create_pcie(VirtMachineState *vms)
>              qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", 0x0,
>                                     vms->nested_smmu_phandle[i], 0x0, 
> 0x10000);
>          }
> +
> +        qemu_opts_foreach(qemu_find_opts("device"),
> +                          assign_nested_smmu, vms, &error_fatal);
>      } else if (vms->iommu) {
>          vms->iommu_phandle = qemu_fdt_alloc_phandle(ms->fdt);
>  
> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> index 0a3f1ab8b5..dfbc4bba3c 100644
> --- a/include/hw/arm/virt.h
> +++ b/include/hw/arm/virt.h
> @@ -246,4 +246,17 @@ find_nested_smmu_by_index(VirtMachineState *vms, int 
> index)
>      return NULL;
>  }
>  
> +static inline VirtNestedSmmu *
> +find_nested_smmu_by_sysfs(VirtMachineState *vms, char *node)
> +{
> +    VirtNestedSmmu *nested_smmu;
> +
> +    QLIST_FOREACH(nested_smmu, &vms->nested_smmu_list, next) {
> +        if (!strncmp(nested_smmu->smmu_node, node, strlen(node))) {
> +            return nested_smmu;
> +        }
> +    }
> +    return NULL;
> +}
> +
>  #endif /* QEMU_ARM_VIRT_H */


Reply via email to