On 7/16/25 09:31, Sairaj Kodilkar wrote:
> The AMD IOMMU is set up at boot time and uses PCI bus numbers + devfn
> for indexing into DTE. The problem is that before the guest started,
> all PCI bus numbers are 0 as no PCI discovery happened yet (BIOS or/and
> kernel will do that later) so relying on the bus number is wrong.
> The immediate effect is emulated devices cannot do DMA when places on
> a bus other that 0.
> 
> Replace static array of address_space with hash table which uses devfn and
> PCIBus* for key as it is not going to change after the guest is booted.
> 
> Co-developed-by: Alexey Kardashevskiy <a...@amd.com>
> Signed-off-by: Alexey Kardashevskiy <a...@amd.com>
> Signed-off-by: Sairaj Kodilkar <sarun...@amd.com>
> ---
>  hw/i386/amd_iommu.c | 124 +++++++++++++++++++++++++++-----------------
>  hw/i386/amd_iommu.h |   2 +-
>  2 files changed, 76 insertions(+), 50 deletions(-)
> 
> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
> index a34062153194..33916b458611 100644
> --- a/hw/i386/amd_iommu.c
> +++ b/hw/i386/amd_iommu.c
> @@ -59,7 +59,7 @@ const char *amdvi_mmio_high[] = {
>  };
> 
>  struct AMDVIAddressSpace {
> -    uint8_t bus_num;            /* bus number                           */
> +    PCIBus *bus;                /* PCIBus (for bus number)              */
>      uint8_t devfn;              /* device function                      */
>      AMDVIState *iommu_state;    /* AMDVI - one per machine              */
>      MemoryRegion root;          /* AMDVI Root memory map region         */
> @@ -101,6 +101,11 @@ typedef enum AMDVIFaultReason {
>      AMDVI_FR_PT_ENTRY_INV,      /* Failure to read PTE from guest memory */
>  } AMDVIFaultReason;
> 
> +typedef struct amdvi_as_key {
> +    PCIBus *bus;
> +    int devfn;
> +} amdvi_as_key;
> +
>  uint64_t amdvi_extended_feature_register(AMDVIState *s)
>  {
>      uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
> @@ -360,6 +365,42 @@ static guint amdvi_uint64_hash(gconstpointer v)
>      return (guint)*(const uint64_t *)v;
>  }
> 
> +static gboolean amdvi_as_equal(gconstpointer v1, gconstpointer v2)
> +{
> +    const struct amdvi_as_key *key1 = v1;
> +    const struct amdvi_as_key *key2 = v2;
> +
> +    return key1->bus == key2->bus && key1->devfn == key2->devfn;
> +}
> +
> +static guint amdvi_as_hash(gconstpointer v)
> +{
> +    const struct amdvi_as_key *key = v;
> +    return (guint)((uint64_t)key->bus | (key->devfn << 24));

I think it should at least be a xor, but a hash similar to the
intel one is probably preferable:

return (guint)((uintptr_t)key->bus << 8) | key->devfn);

> +}
> +
> +static AMDVIAddressSpace *amdvi_as_lookup(AMDVIState *s, PCIBus *bus,
> +                                          int devfn)
> +{
> +    amdvi_as_key key = { .bus = bus, .devfn = devfn };
> +    return g_hash_table_lookup(s->address_spaces, &key);
> +}
> +
> +static int amdvi_find_as_by_devid(gpointer key, gpointer value,
> +                                  gpointer user_data)
> +{
> +    amdvi_as_key *as = (struct amdvi_as_key *)key;
> +    uint16_t devid = *((uint16_t *)user_data);
> +
> +    return devid == PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn);
> +}
> +
> +static AMDVIAddressSpace *amdvi_get_as_by_devid(AMDVIState *s, uint16_t 
> devid)
> +{
> +    return g_hash_table_find(s->address_spaces,
> +                             amdvi_find_as_by_devid, &devid);
> +}
> +
>  static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
>                                             uint64_t devid)
>  {
> @@ -530,7 +571,7 @@ static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, 
> uint64_t pte_addr,
> 
>  static int amdvi_as_to_dte(AMDVIAddressSpace *as, uint64_t *dte)
>  {
> -    uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
> +    uint16_t devid = PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn);
>      AMDVIState *s = as->iommu_state;
> 
>      if (!amdvi_get_dte(s, devid, dte)) {
> @@ -983,23 +1024,13 @@ static void 
> amdvi_switch_address_space(AMDVIAddressSpace *amdvi_as)
>   */
>  static void amdvi_switch_address_space_all(AMDVIState *s)
>  {
> -    AMDVIAddressSpace **iommu_as;
> -
> -    for (int bus_num = 0; bus_num < PCI_BUS_MAX; bus_num++) {
> -
> -        /* Nothing to do if there are no devices on the current bus */
> -        if (!s->address_spaces[bus_num]) {
> -            continue;
> -        }
> -        iommu_as = s->address_spaces[bus_num];
> +    AMDVIAddressSpace *iommu_as;
> +    GHashTableIter as_it;
> 
> -        for (int devfn = 0; devfn < PCI_DEVFN_MAX; devfn++) {
> +    g_hash_table_iter_init(&as_it, s->address_spaces);
> 
> -            if (!iommu_as[devfn]) {
> -                continue;
> -            }
> -            amdvi_switch_address_space(iommu_as[devfn]);
> -        }
> +    while (g_hash_table_iter_next(&as_it, NULL, (void **)&iommu_as)) {
> +            amdvi_switch_address_space(iommu_as);
>      }
>  }
> 
> @@ -1012,28 +1043,22 @@ static void amdvi_switch_address_space_all(AMDVIState 
> *s)
>   */
>  static void amdvi_update_addr_translation_mode(AMDVIState *s, uint16_t devid)
>  {
> -    uint8_t bus_num, devfn, dte_mode;
> +    uint8_t dte_mode;
>      AMDVIAddressSpace *as;
>      uint64_t dte[4] = { 0 };
>      IOMMUNotifier *n;
>      int ret;
> 
> -    /*
> -     * Convert the devid encoded in the command to a bus and devfn in
> -     * order to retrieve the corresponding address space.
> -     */
> -    bus_num = PCI_BUS_NUM(devid);
> -    devfn = devid & 0xff;
> -
>      /*
>       * The main buffer of size (AMDVIAddressSpace *) * (PCI_BUS_MAX) has 
> already
>       * been allocated within AMDVIState, but must be careful to not access
>       * unallocated devfn.
>       */
> -    if (!s->address_spaces[bus_num] || !s->address_spaces[bus_num][devfn]) {
> +
> +    as = amdvi_get_as_by_devid(s, devid);
> +    if (!as) {
>          return;
>      }
> -    as = s->address_spaces[bus_num][devfn];
> 
>      ret = amdvi_as_to_dte(as, dte);
> 
> @@ -1699,7 +1724,7 @@ static void amdvi_do_translate(AMDVIAddressSpace *as, 
> hwaddr addr,
>                                 bool is_write, IOMMUTLBEntry *ret)
>  {
>      AMDVIState *s = as->iommu_state;
> -    uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
> +    uint16_t devid = PCI_BUILD_BDF(pci_bus_num(as->bus), as->devfn);
>      AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
>      uint64_t entry[4];
>      int dte_ret;
> @@ -1773,7 +1798,7 @@ static IOMMUTLBEntry amdvi_translate(IOMMUMemoryRegion 
> *iommu, hwaddr addr,
>      }
> 
>      amdvi_do_translate(as, addr, flag & IOMMU_WO, &ret);
> -    trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
> +    trace_amdvi_translation_result(pci_bus_num(as->bus), PCI_SLOT(as->devfn),
>              PCI_FUNC(as->devfn), addr, ret.translated_addr);
>      return ret;
>  }
> @@ -2137,30 +2162,28 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus 
> *bus, void *opaque, int devfn)
>  {
>      char name[128];
>      AMDVIState *s = opaque;
> -    AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
> -    int bus_num = pci_bus_num(bus);
> +    AMDVIAddressSpace *amdvi_dev_as;
> +    amdvi_as_key *key;
> 
> -    iommu_as = s->address_spaces[bus_num];
> +    amdvi_dev_as = amdvi_as_lookup(s, bus, devfn);
> 
>      /* allocate memory during the first run */
> -    if (!iommu_as) {
> -        iommu_as = g_new0(AMDVIAddressSpace *, PCI_DEVFN_MAX);
> -        s->address_spaces[bus_num] = iommu_as;
> -    }
> -
> -    /* set up AMD-Vi region */
> -    if (!iommu_as[devfn]) {
> +    if (!amdvi_dev_as) {
>          snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
> 
> -        iommu_as[devfn] = g_new0(AMDVIAddressSpace, 1);
> -        iommu_as[devfn]->bus_num = (uint8_t)bus_num;
> -        iommu_as[devfn]->devfn = (uint8_t)devfn;
> -        iommu_as[devfn]->iommu_state = s;
> -        iommu_as[devfn]->notifier_flags = IOMMU_NONE;

s/IOMMU_NONE/IOMMU_NOTIFIER_NONE

Thanks,
Ethan

> -        iommu_as[devfn]->iova_tree = iova_tree_new();
> -        iommu_as[devfn]->addr_translation = false;
> +        amdvi_dev_as = g_new0(AMDVIAddressSpace, 1);
> +        key = g_new0(amdvi_as_key, 1);
> 
> -        amdvi_dev_as = iommu_as[devfn];
> +        amdvi_dev_as->bus = bus;
> +        amdvi_dev_as->devfn = (uint8_t)devfn;
> +        amdvi_dev_as->iommu_state = s;
> +        amdvi_dev_as->notifier_flags = IOMMU_NONE;
> +        amdvi_dev_as->iova_tree = iova_tree_new();
> +        amdvi_dev_as->addr_translation = false;
> +        key->bus = bus;
> +        key->devfn = devfn;
> +
> +        g_hash_table_insert(s->address_spaces, key, amdvi_dev_as);
> 
>          /*
>           * Memory region relationships looks like (Address range shows
> @@ -2203,7 +2226,7 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, 
> void *opaque, int devfn)
> 
>          amdvi_switch_address_space(amdvi_dev_as);
>      }
> -    return &iommu_as[devfn]->as;
> +    return &amdvi_dev_as->as;
>  }
> 
>  static const PCIIOMMUOps amdvi_iommu_ops = {
> @@ -2244,7 +2267,7 @@ static int 
> amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
>      if (!s->dma_remap && (new & IOMMU_NOTIFIER_MAP)) {
>          error_setg_errno(errp, ENOTSUP,
>                  "device %02x.%02x.%x requires dma-remap=1",
> -                as->bus_num, PCI_SLOT(as->devfn), PCI_FUNC(as->devfn));
> +                pci_bus_num(as->bus), PCI_SLOT(as->devfn), 
> PCI_FUNC(as->devfn));
>          return -ENOTSUP;
>      }
> 
> @@ -2353,6 +2376,9 @@ static void amdvi_sysbus_realize(DeviceState *dev, 
> Error **errp)
>      s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
>                                       amdvi_uint64_equal, g_free, g_free);
> 
> +    s->address_spaces = g_hash_table_new_full(amdvi_as_hash,
> +                                     amdvi_as_equal, g_free, g_free);
> +
>      /* This device should take care of IOMMU PCI properties */
>      if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) {
>          return;
> diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
> index 3dd4e7e3e8b8..37a57c4dd553 100644
> --- a/hw/i386/amd_iommu.h
> +++ b/hw/i386/amd_iommu.h
> @@ -409,7 +409,7 @@ struct AMDVIState {
>      bool mmio_enabled;
> 
>      /* for each served device */
> -    AMDVIAddressSpace **address_spaces[PCI_BUS_MAX];
> +    GHashTable *address_spaces;
> 
>      /* list of address spaces with registered notifiers */
>      QLIST_HEAD(, AMDVIAddressSpace) amdvi_as_with_notifiers;
> --
> 2.34.1
> 
> 

Reply via email to