On 9/29/25 3:36 PM, Shameer Kolothum wrote:
> From: Nicolin Chen <[email protected]>
>
> Implement a set_iommu_device callback:
> -If found an existing viommu reuse that.
> -Else,
> Allocate a vIOMMU with the nested parent S2 hwpt allocated by VFIO.
> Though, iommufd’s vIOMMU model supports nested translation by
> encapsulating a S2 nesting parent HWPT, devices cannot attach to this
> parent HWPT directly. So two proxy nested HWPTs (bypass and abort) are
> allocated to handle device attachments.
> -And add the dev to viommu device list
>
> Also add an unset_iommu_device to unwind/cleanup above.
>
> Signed-off-by: Nicolin Chen <[email protected]>
> Signed-off-by: Shameer Kolothum <[email protected]
> Signed-off-by: Shameer Kolothum <[email protected]>
> ---
> hw/arm/smmuv3-accel.c | 150 ++++++++++++++++++++++++++++++++++++++++
> hw/arm/smmuv3-accel.h | 17 +++++
> hw/arm/trace-events | 4 ++
> include/hw/arm/smmuv3.h | 1 +
> 4 files changed, 172 insertions(+)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index 6b0e512d86..81fa738f6f 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -8,6 +8,7 @@
>
> #include "qemu/osdep.h"
> #include "qemu/error-report.h"
> +#include "trace.h"
>
> #include "hw/arm/smmuv3.h"
> #include "hw/iommu.h"
> @@ -17,6 +18,9 @@
>
> #include "smmuv3-accel.h"
>
> +#define SMMU_STE_VALID (1ULL << 0)
> +#define SMMU_STE_CFG_BYPASS (1ULL << 3)
> +
> static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus
> *sbus,
> PCIBus *bus, int devfn)
> {
> @@ -35,6 +39,149 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState
> *bs, SMMUPciBus *sbus,
> return accel_dev;
> }
>
> +static bool
> +smmuv3_accel_dev_alloc_viommu(SMMUv3AccelDevice *accel_dev,
> + HostIOMMUDeviceIOMMUFD *idev, Error **errp)
> +{
> + struct iommu_hwpt_arm_smmuv3 bypass_data = {
> + .ste = { SMMU_STE_CFG_BYPASS | SMMU_STE_VALID, 0x0ULL },
> + };
> + struct iommu_hwpt_arm_smmuv3 abort_data = {
> + .ste = { SMMU_STE_VALID, 0x0ULL },
> + };
> + SMMUDevice *sdev = &accel_dev->sdev;
> + SMMUState *bs = sdev->smmu;
> + SMMUv3State *s = ARM_SMMUV3(bs);
> + SMMUv3AccelState *s_accel = s->s_accel;
> + uint32_t s2_hwpt_id = idev->hwpt_id;
> + SMMUViommu *viommu;
> + uint32_t viommu_id;
> +
> + if (s_accel->viommu) {
> + accel_dev->viommu = s_accel->viommu;
> + return true;
> + }
> +
> + if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
> + IOMMU_VIOMMU_TYPE_ARM_SMMUV3,
> + s2_hwpt_id, &viommu_id, errp)) {
> + return false;
> + }
> +
> + viommu = g_new0(SMMUViommu, 1);
> + viommu->core.viommu_id = viommu_id;
> + viommu->core.s2_hwpt_id = s2_hwpt_id;
> + viommu->core.iommufd = idev->iommufd;
> +
> + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> + viommu->core.viommu_id, 0,
> + IOMMU_HWPT_DATA_ARM_SMMUV3,
> + sizeof(abort_data), &abort_data,
> + &viommu->abort_hwpt_id, errp)) {
> + goto free_viommu;
> + }
> +
> + if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> + viommu->core.viommu_id, 0,
> + IOMMU_HWPT_DATA_ARM_SMMUV3,
> + sizeof(bypass_data), &bypass_data,
> + &viommu->bypass_hwpt_id, errp)) {
> + goto free_abort_hwpt;
> + }
> +
> + viommu->iommufd = idev->iommufd;
> +
> + s_accel->viommu = viommu;
> + accel_dev->viommu = viommu;
> + return true;
> +
> +free_abort_hwpt:
> + iommufd_backend_free_id(idev->iommufd, viommu->abort_hwpt_id);
> +free_viommu:
> + iommufd_backend_free_id(idev->iommufd, viommu->core.viommu_id);
> + g_free(viommu);
> + return false;
> +}
> +
> +static bool smmuv3_accel_set_iommu_device(PCIBus *bus, void *opaque, int
> devfn,
> + HostIOMMUDevice *hiod, Error
> **errp)
> +{
> + HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
> + SMMUState *bs = opaque;
> + SMMUv3State *s = ARM_SMMUV3(bs);
> + SMMUv3AccelState *s_accel = s->s_accel;
> + SMMUPciBus *sbus = smmu_get_sbus(bs, bus);
> + SMMUv3AccelDevice *accel_dev = smmuv3_accel_get_dev(bs, sbus, bus,
> devfn);
> + SMMUDevice *sdev = &accel_dev->sdev;
> + uint16_t sid = smmu_get_sid(sdev);
> +
> + if (!idev) {
> + return true;
> + }
> +
> + if (accel_dev->idev) {
> + if (accel_dev->idev != idev) {
> + error_setg(errp, "Device 0x%x already has an associated IOMMU
> dev",
> + sid);
> + return false;
> + }
> + return true;
> + }
> +
> + if (!smmuv3_accel_dev_alloc_viommu(accel_dev, idev, errp)) {
> + error_setg(errp, "Device 0x%x: Unable to alloc viommu", sid);
> + return false;
> + }
> +
> + accel_dev->idev = idev;
> + QLIST_INSERT_HEAD(&s_accel->viommu->device_list, accel_dev, next);
> + trace_smmuv3_accel_set_iommu_device(devfn, sid);
> + return true;
> +}
> +
> +static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
> + int devfn)
> +{
> + SMMUState *bs = opaque;
> + SMMUv3State *s = ARM_SMMUV3(bs);
> + SMMUPciBus *sbus = g_hash_table_lookup(bs->smmu_pcibus_by_busptr, bus);
> + SMMUv3AccelDevice *accel_dev;
> + SMMUViommu *viommu;
> + SMMUDevice *sdev;
> + uint16_t sid;
> +
> + if (!sbus) {
> + return;
> + }
> +
> + sdev = sbus->pbdev[devfn];
> + if (!sdev) {
> + return;
> + }
> +
> + sid = smmu_get_sid(sdev);
> + accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> + accel_dev->idev->hwpt_id,
> + NULL)) {
> + error_report("Unable to attach dev 0x%x to the default HW pagetable",
> + sid);
> + }
> +
> + accel_dev->idev = NULL;
> + QLIST_REMOVE(accel_dev, next);
> + trace_smmuv3_accel_unset_iommu_device(devfn, sid);
> +
> + viommu = s->s_accel->viommu;
> + if (QLIST_EMPTY(&viommu->device_list)) {
> + iommufd_backend_free_id(viommu->iommufd, viommu->bypass_hwpt_id);
> + iommufd_backend_free_id(viommu->iommufd, viommu->abort_hwpt_id);
> + iommufd_backend_free_id(viommu->iommufd, viommu->core.viommu_id);
> + g_free(viommu);
> + s->s_accel->viommu = NULL;
> + }
> +}
> +
> static bool smmuv3_accel_pdev_allowed(PCIDevice *pdev, bool *vfio_pci)
> {
>
> @@ -121,6 +268,8 @@ static uint64_t smmuv3_accel_get_viommu_flags(void
> *opaque)
> static const PCIIOMMUOps smmuv3_accel_ops = {
> .get_address_space = smmuv3_accel_find_add_as,
> .get_viommu_flags = smmuv3_accel_get_viommu_flags,
> + .set_iommu_device = smmuv3_accel_set_iommu_device,
> + .unset_iommu_device = smmuv3_accel_unset_iommu_device,
> };
>
> void smmuv3_accel_init(SMMUv3State *s)
> @@ -128,4 +277,5 @@ void smmuv3_accel_init(SMMUv3State *s)
> SMMUState *bs = ARM_SMMU(s);
>
> bs->iommu_ops = &smmuv3_accel_ops;
> + s->s_accel = g_new0(SMMUv3AccelState, 1);
> }
> diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> index 70da16960f..3c8506d1e6 100644
> --- a/hw/arm/smmuv3-accel.h
> +++ b/hw/arm/smmuv3-accel.h
> @@ -10,12 +10,29 @@
> #define HW_ARM_SMMUV3_ACCEL_H
>
> #include "hw/arm/smmu-common.h"
> +#include "system/iommufd.h"
> +#include <linux/iommufd.h>
> #include CONFIG_DEVICES
>
> +typedef struct SMMUViommu {
> + IOMMUFDBackend *iommufd;
> + IOMMUFDViommu core;
could we avoid using too generic field names like "core". In the rest of
the code it is then difficult to understand what the field corresponds to.
viommu?
> + uint32_t bypass_hwpt_id;
> + uint32_t abort_hwpt_id;
> + QLIST_HEAD(, SMMUv3AccelDevice) device_list;
> +} SMMUViommu;
> +
> typedef struct SMMUv3AccelDevice {
> SMMUDevice sdev;
> + HostIOMMUDeviceIOMMUFD *idev;
same here. hdev at least would refer to host dev at least. Or does it
correspond to some kernel terminology?
Eric
> + SMMUViommu *viommu;
> + QLIST_ENTRY(SMMUv3AccelDevice) next;
> } SMMUv3AccelDevice;
>
> +typedef struct SMMUv3AccelState {
> + SMMUViommu *viommu;
> +} SMMUv3AccelState;
> +
> #ifdef CONFIG_ARM_SMMUV3_ACCEL
> void smmuv3_accel_init(SMMUv3State *s);
> #else
> diff --git a/hw/arm/trace-events b/hw/arm/trace-events
> index f3386bd7ae..86370d448a 100644
> --- a/hw/arm/trace-events
> +++ b/hw/arm/trace-events
> @@ -66,6 +66,10 @@ smmuv3_notify_flag_del(const char *iommu) "DEL
> SMMUNotifier node for iommu mr=%s
> smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t
> iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d
> iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d"
> smmu_reset_exit(void) ""
>
> +#smmuv3-accel.c
> +smmuv3_accel_set_iommu_device(int devfn, uint32_t sid) "devfn=0x%x
> (sid=0x%x)"
> +smmuv3_accel_unset_iommu_device(int devfn, uint32_t sid) "devfn=0x%x
> (sid=0x%x)"
> +
> # strongarm.c
> strongarm_uart_update_parameters(const char *label, int speed, char parity,
> int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
> strongarm_ssp_read_underrun(void) "SSP rx underrun"
> diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
> index bb7076286b..5f3e9089a7 100644
> --- a/include/hw/arm/smmuv3.h
> +++ b/include/hw/arm/smmuv3.h
> @@ -66,6 +66,7 @@ struct SMMUv3State {
>
> /* SMMU has HW accelerator support for nested S1 + s2 */
> bool accel;
> + struct SMMUv3AccelState *s_accel;
> };
>
> typedef enum {