On 7/16/25 09:31, Sairaj Kodilkar wrote: > The AMD IOMMU is set up at boot time and uses PCI bus numbers + devfn > for indexing into DTE. The problem is that before the guest started, > all PCI bus numbers are 0 as no PCI discovery happened yet (BIOS or/and > kernel will do that later) so relying on the bus number is wrong. > The immediate effect is emulated devices cannot do DMA when places on > a bus other that 0. > > Replace static array of address_space with hash table which uses devfn and > PCIBus* for key as it is not going to change after the guest is booted. > > Co-developed-by: Alexey Kardashevskiy <a...@amd.com> > Signed-off-by: Alexey Kardashevskiy <a...@amd.com> > Signed-off-by: Sairaj Kodilkar <sarun...@amd.com> > --- > hw/i386/amd_iommu.c | 124 +++++++++++++++++++++++++++----------------- > hw/i386/amd_iommu.h | 2 +- > 2 files changed, 76 insertions(+), 50 deletions(-) > > diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c > index a34062153194..33916b458611 100644 > --- a/hw/i386/amd_iommu.c > +++ b/hw/i386/amd_iommu.c > @@ -59,7 +59,7 @@ const char *amdvi_mmio_high[] = { > }; > > struct AMDVIAddressSpace { > - uint8_t bus_num; /* bus number */ > + PCIBus *bus; /* PCIBus (for bus number) */ > uint8_t devfn; /* device function */ > AMDVIState *iommu_state; /* AMDVI - one per machine */ > MemoryRegion root; /* AMDVI Root memory map region */ > @@ -101,6 +101,11 @@ typedef enum AMDVIFaultReason { > AMDVI_FR_PT_ENTRY_INV, /* Failure to read PTE from guest memory */ > } AMDVIFaultReason; > > +typedef struct amdvi_as_key { > + PCIBus *bus; > + int devfn; > +} amdvi_as_key; > + > uint64_t amdvi_extended_feature_register(AMDVIState *s) > { > uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES; > @@ -360,6 +365,42 @@ static guint amdvi_uint64_hash(gconstpointer v) > return (guint)*(const uint64_t *)v; > } > > +static gboolean amdvi_as_equal(gconstpointer v1, gconstpointer v2) > +{ > + const struct amdvi_as_key *key1 = v1; > + const struct amdvi_as_key *key2 = v2; > + > + return key1->bus == key2->bus && key1->devfn == key2->devfn; > +} > + > +static guint amdvi_as_hash(gconstpointer v) > +{ > + const struct amdvi_as_key *key = v; > + return (guint)((uint64_t)key->bus | (key->devfn << 24));
I think it should at least be a xor, but a hash similar to the intel one is probably preferable: return (guint)((uintptr_t)key->bus << 8) | key->devfn); > +} > + > +static AMDVIAddressSpace *amdvi_as_lookup(AMDVIState *s, PCIBus *bus, > + int devfn) > +{ > + amdvi_as_key key = { .bus = bus, .devfn = devfn }; > + return g_hash_table_lookup(s->address_spaces, &key); > +} > + > +static int amdvi_find_as_by_devid(gpointer key, gpointer value, > + gpointer user_data) > +{ > + amdvi_as_key *as = (struct amdvi_as_key *)key; > + uint16_t devid = *((uint16_t *)user_data); > + > + return devid == PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn); > +} > + > +static AMDVIAddressSpace *amdvi_get_as_by_devid(AMDVIState *s, uint16_t > devid) > +{ > + return g_hash_table_find(s->address_spaces, > + amdvi_find_as_by_devid, &devid); > +} > + > static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr, > uint64_t devid) > { > @@ -530,7 +571,7 @@ static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, > uint64_t pte_addr, > > static int amdvi_as_to_dte(AMDVIAddressSpace *as, uint64_t *dte) > { > - uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn); > + uint16_t devid = PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn); > AMDVIState *s = as->iommu_state; > > if (!amdvi_get_dte(s, devid, dte)) { > @@ -983,23 +1024,13 @@ static void > amdvi_switch_address_space(AMDVIAddressSpace *amdvi_as) > */ > static void amdvi_switch_address_space_all(AMDVIState *s) > { > - AMDVIAddressSpace **iommu_as; > - > - for (int bus_num = 0; bus_num < PCI_BUS_MAX; bus_num++) { > - > - /* Nothing to do if there are no devices on the current bus */ > - if (!s->address_spaces[bus_num]) { > - continue; > - } > - iommu_as = s->address_spaces[bus_num]; > + AMDVIAddressSpace *iommu_as; > + GHashTableIter as_it; > > - for (int devfn = 0; devfn < PCI_DEVFN_MAX; devfn++) { > + g_hash_table_iter_init(&as_it, s->address_spaces); > > - if (!iommu_as[devfn]) { > - continue; > - } > - amdvi_switch_address_space(iommu_as[devfn]); > - } > + while (g_hash_table_iter_next(&as_it, NULL, (void **)&iommu_as)) { > + amdvi_switch_address_space(iommu_as); > } > } > > @@ -1012,28 +1043,22 @@ static void amdvi_switch_address_space_all(AMDVIState > *s) > */ > static void amdvi_update_addr_translation_mode(AMDVIState *s, uint16_t devid) > { > - uint8_t bus_num, devfn, dte_mode; > + uint8_t dte_mode; > AMDVIAddressSpace *as; > uint64_t dte[4] = { 0 }; > IOMMUNotifier *n; > int ret; > > - /* > - * Convert the devid encoded in the command to a bus and devfn in > - * order to retrieve the corresponding address space. > - */ > - bus_num = PCI_BUS_NUM(devid); > - devfn = devid & 0xff; > - > /* > * The main buffer of size (AMDVIAddressSpace *) * (PCI_BUS_MAX) has > already > * been allocated within AMDVIState, but must be careful to not access > * unallocated devfn. > */ > - if (!s->address_spaces[bus_num] || !s->address_spaces[bus_num][devfn]) { > + > + as = amdvi_get_as_by_devid(s, devid); > + if (!as) { > return; > } > - as = s->address_spaces[bus_num][devfn]; > > ret = amdvi_as_to_dte(as, dte); > > @@ -1699,7 +1724,7 @@ static void amdvi_do_translate(AMDVIAddressSpace *as, > hwaddr addr, > bool is_write, IOMMUTLBEntry *ret) > { > AMDVIState *s = as->iommu_state; > - uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn); > + uint16_t devid = PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn); > AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid); > uint64_t entry[4]; > int dte_ret; > @@ -1773,7 +1798,7 @@ static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion > *iommu, hwaddr addr, > } > > amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret); > - trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn), > + trace_amdvi_translation_result(pci_bus_num(as->bus), PCI_SLOT(as->devfn), > PCI_FUNC(as->devfn), addr, ret.translated_addr); > return ret; > } > @@ -2137,30 +2162,28 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus > *bus, void *opaque, int devfn) > { > char name[128]; > AMDVIState *s = opaque; > - AMDVIAddressSpace **iommu_as, *amdvi_dev_as; > - int bus_num = pci_bus_num(bus); > + AMDVIAddressSpace *amdvi_dev_as; > + amdvi_as_key *key; > > - iommu_as = s->address_spaces[bus_num]; > + amdvi_dev_as = amdvi_as_lookup(s, bus, devfn); > > /* allocate memory during the first run */ > - if (!iommu_as) { > - iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX); > - s->address_spaces[bus_num] = iommu_as; > - } > - > - /* set up AMD-Vi region */ > - if (!iommu_as[devfn]) { > + if (!amdvi_dev_as) { > snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn); > > - iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1); > - iommu_as[devfn]->bus_num = (uint8_t)bus_num; > - iommu_as[devfn]->devfn = (uint8_t)devfn; > - iommu_as[devfn]->iommu_state = s; > - iommu_as[devfn]->notifier_flags = IOMMU_NONE; s/IOMMU_NONE/IOMMU_NOTIFIER_NONE Thanks, Ethan > - iommu_as[devfn]->iova_tree = iova_tree_new(); > - iommu_as[devfn]->addr_translation = false; > + amdvi_dev_as = g_new0(AMDVIAddressSpace, 1); > + key = g_new0(amdvi_as_key, 1); > > - amdvi_dev_as = iommu_as[devfn]; > + amdvi_dev_as->bus = bus; > + amdvi_dev_as->devfn = (uint8_t)devfn; > + amdvi_dev_as->iommu_state = s; > + amdvi_dev_as->notifier_flags = IOMMU_NONE; > + amdvi_dev_as->iova_tree = iova_tree_new(); > + amdvi_dev_as->addr_translation = false; > + key->bus = bus; > + key->devfn = devfn; > + > + g_hash_table_insert(s->address_spaces, key, amdvi_dev_as); > > /* > * Memory region relationships looks like (Address range shows > @@ -2203,7 +2226,7 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, > void *opaque, int devfn) > > amdvi_switch_address_space(amdvi_dev_as); > } > - return &iommu_as[devfn]->as; > + return &amdvi_dev_as->as; > } > > static const PCIIOMMUOps amdvi_iommu_ops = { > @@ -2244,7 +2267,7 @@ static int > amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, > if (!s->dma_remap && (new & IOMMU_NOTIFIER_MAP)) { > error_setg_errno(errp, ENOTSUP, > "device %02x.%02x.%x requires dma-remap=1", > - as->bus_num, PCI_SLOT(as->devfn), PCI_FUNC(as->devfn)); > + pci_bus_num(as->bus), PCI_SLOT(as->devfn), > PCI_FUNC(as->devfn)); > return -ENOTSUP; > } > > @@ -2353,6 +2376,9 @@ static void amdvi_sysbus_realize(DeviceState *dev, > Error **errp) > s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, > amdvi_uint64_equal, g_free, g_free); > > + s->address_spaces = g_hash_table_new_full(amdvi_as_hash, > + amdvi_as_equal, g_free, g_free); > + > /* This device should take care of IOMMU PCI properties */ > if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) { > return; > diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h > index 3dd4e7e3e8b8..37a57c4dd553 100644 > --- a/hw/i386/amd_iommu.h > +++ b/hw/i386/amd_iommu.h > @@ -409,7 +409,7 @@ struct AMDVIState { > bool mmio_enabled; > > /* for each served device */ > - AMDVIAddressSpace **address_spaces[PCI_BUS_MAX]; > + GHashTable *address_spaces; > > /* list of address spaces with registered notifiers */ > QLIST_HEAD(, AMDVIAddressSpace) amdvi_as_with_notifiers; > -- > 2.34.1 > >