On 6/26/24 02:28, Nicolin Chen wrote: > With iommu=nested-smmuv3, there could be multiple nested SMMU instances in > the vms. A passthrough device must to look up for its iommu handler in its > sysfs node, and then link to the nested SMMU instance created for the same > iommu handler. This isn't easy to do. > > Add an auto-assign piece after all vSMMU backed pxb buses are created. It > loops the existing input devices, and sets/replaces their pci bus numbers > with a newly created pcie-root-port to the pxb bus. Here again I don't think it is acceptable to create such topology under the hood. Libvirt shall master the whole PCIe topology. Eric > > Note that this is not an ideal solution to handle hot plug device. > > Signed-off-by: Nicolin Chen <nicol...@nvidia.com> > --- > hw/arm/virt.c | 110 ++++++++++++++++++++++++++++++++++++++++++ > include/hw/arm/virt.h | 13 +++++ > 2 files changed, 123 insertions(+) > > diff --git a/hw/arm/virt.c b/hw/arm/virt.c > index a54332fca8..3610f53304 100644 > --- a/hw/arm/virt.c > +++ b/hw/arm/virt.c > @@ -38,6 +38,7 @@ > #include "hw/arm/primecell.h" > #include "hw/arm/virt.h" > #include "hw/block/flash.h" > +#include "hw/vfio/pci.h" > #include "hw/vfio/vfio-calxeda-xgmac.h" > #include "hw/vfio/vfio-amd-xgbe.h" > #include "hw/display/ramfb.h" > @@ -1491,6 +1492,112 @@ static void > create_virtio_iommu_dt_bindings(VirtMachineState *vms) > bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - > bdf); > } > > +static char *create_new_pcie_port(VirtNestedSmmu *nested_smmu, Error **errp) > +{ > + uint32_t port_nr = nested_smmu->pci_bus->qbus.num_children; > + uint32_t chassis_nr = UINT8_MAX - nested_smmu->index; > + uint32_t bus_nr = pci_bus_num(nested_smmu->pci_bus); > + DeviceState *dev; > + char *name_port; > + > + /* Create a root port */ > + dev = qdev_new("pcie-root-port"); > + name_port = g_strdup_printf("smmu_bus0x%x_port%d", bus_nr, port_nr); > + > + if (!qdev_set_id(dev, name_port, &error_fatal)) { > + /* FIXME retry with a different port num? */ > + error_setg(errp, "Could not set pcie-root-port ID %s", name_port); > + g_free(name_port); > + g_free(dev); > + return NULL; > + } > + qdev_prop_set_uint32(dev, "chassis", chassis_nr); > + qdev_prop_set_uint32(dev, "slot", port_nr); > + qdev_prop_set_uint64(dev, "io-reserve", 0); > + qdev_realize_and_unref(dev, BUS(nested_smmu->pci_bus), &error_fatal); > + return name_port; > +} > + > +static int assign_nested_smmu(void *opaque, QemuOpts *opts, Error **errp) > +{ > + VirtMachineState *vms = (VirtMachineState *)opaque; > + const char *sysfsdev = qemu_opt_get(opts, "sysfsdev"); > + const char *iommufd = qemu_opt_get(opts, "iommufd"); > + const char *driver = qemu_opt_get(opts, "driver"); > + const char *host = qemu_opt_get(opts, "host"); > + const char *bus = qemu_opt_get(opts, "bus"); > + VirtNestedSmmu *nested_smmu; > + char *link_iommu; > + char *dir_iommu; > + char *smmu_node; > + char *name_port; > + int ret = 0; > + > + if (!iommufd || !driver) { > + return 0; > + } > + if (!sysfsdev && !host) { > + return 0; > + } > + if (strncmp(driver, TYPE_VFIO_PCI, strlen(TYPE_VFIO_PCI))) { > + return 0; > + } > + /* If the device wants to attach to the default bus, do not reassign it > */ > + if (bus && !strncmp(bus, "pcie.0", strlen(bus))) { > + return 0; > + } > + > + if (sysfsdev) { > + link_iommu = g_strdup_printf("%s/iommu", sysfsdev); > + } else { > + link_iommu = g_strdup_printf("/sys/bus/pci/devices/%s/iommu", host); > + } > + > + dir_iommu = realpath(link_iommu, NULL); > + if (!dir_iommu) { > + error_setg(errp, "Could not get the real path for iommu link: %s", > + link_iommu); > + ret = -EINVAL; > + goto free_link; > + } > + > + smmu_node = g_path_get_basename(dir_iommu); > + if (!smmu_node) { > + error_setg(errp, "Could not get SMMU node name for iommu at: %s", > + dir_iommu); > + ret = -EINVAL; > + goto free_dir; > + } > + > + nested_smmu = find_nested_smmu_by_sysfs(vms, smmu_node); > + if (!nested_smmu) { > + error_setg(errp, "Could not find any detected SMMU matching node: > %s", > + smmu_node); > + ret = -EINVAL; > + goto free_node; > + } > + > + name_port = create_new_pcie_port(nested_smmu, errp); > + if (!name_port) { > + ret = -EBUSY; > + goto free_node; > + } > + > + qemu_opt_set(opts, "bus", name_port, &error_fatal); > + if (bus) { > + error_report("overriding PCI bus %s to %s for device %s [%s]", > + bus, name_port, host, sysfsdev); > + } > + > +free_node: > + free(smmu_node); > +free_dir: > + free(dir_iommu); > +free_link: > + free(link_iommu); > + return ret; > +} > + > /* > * FIXME this is used to reverse for hotplug devices, yet it could result in > a > * big waste of PCI bus numbners. > @@ -1669,6 +1776,9 @@ static void create_pcie(VirtMachineState *vms) > qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", 0x0, > vms->nested_smmu_phandle[i], 0x0, > 0x10000); > } > + > + qemu_opts_foreach(qemu_find_opts("device"), > + assign_nested_smmu, vms, &error_fatal); > } else if (vms->iommu) { > vms->iommu_phandle = qemu_fdt_alloc_phandle(ms->fdt); > > diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h > index 0a3f1ab8b5..dfbc4bba3c 100644 > --- a/include/hw/arm/virt.h > +++ b/include/hw/arm/virt.h > @@ -246,4 +246,17 @@ find_nested_smmu_by_index(VirtMachineState *vms, int > index) > return NULL; > } > > +static inline VirtNestedSmmu * > +find_nested_smmu_by_sysfs(VirtMachineState *vms, char *node) > +{ > + VirtNestedSmmu *nested_smmu; > + > + QLIST_FOREACH(nested_smmu, &vms->nested_smmu_list, next) { > + if (!strncmp(nested_smmu->smmu_node, node, strlen(node))) { > + return nested_smmu; > + } > + } > + return NULL; > +} > + > #endif /* QEMU_ARM_VIRT_H */