On Fri, Oct 31, 2025 at 10:49:46AM +0000, Shameer Kolothum wrote:
> +static bool
> +smmuv3_accel_alloc_vdev(SMMUv3AccelDevice *accel_dev, int sid, Error **errp)
> +{
> +    SMMUViommu *vsmmu = accel_dev->vsmmu;
> +    IOMMUFDVdev *vdev;
> +    uint32_t vdevice_id;
> +
> +    if (!accel_dev->idev || accel_dev->vdev) {
> +        return true;
> +    }

We probably don't need to check !accel_dev->dev. It should have
been blocked by its caller, which does block !accel_dev->vsmmu.
Once we fix the missing "accel_dev->vsmmu NULL", it should work.

> +
> +    if (!iommufd_backend_alloc_vdev(vsmmu->iommufd, accel_dev->idev->devid,
> +                                    vsmmu->viommu.viommu_id, sid,
> +                                    &vdevice_id, errp)) {
> +            return false;
> +    }
> +    if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> +                                               vsmmu->bypass_hwpt_id, errp)) 
> {
> +        iommufd_backend_free_id(vsmmu->iommufd, vdevice_id);
> +        return false;
> +    }

This should check SMMUEN bit?

Linux driver (as an example) seems to set CMDQEN and install all
the default bypass STEs, before SMMUEN=1.

In this case, the target hwpt here should follow guest's GBPA.

> +static bool
> +smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev, bool 
> abort,
> +                                      Error **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> +    SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> +    uint32_t hwpt_id;
> +
> +    if (!s1_hwpt || !accel_dev->vsmmu) {
> +        return true;
> +    }
> +
> +    if (abort) {
> +        hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
> +    } else {
> +        hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
> +    }

This should probably check SMMUEN/GBPA as well.

Likely we need "enabled" and "gbpa_abort" flags in SMMUState.

> +static bool
> +smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
> +                                    uint32_t data_type, uint32_t data_len,
> +                                    void *data, Error **errp)
> +{
> +    SMMUViommu *vsmmu = accel_dev->vsmmu;
> +    SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> +    HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> +    uint32_t flags = 0;
> +
> +    if (!idev || !vsmmu) {
> +        error_setg(errp, "Device 0x%x has no associated IOMMU dev or vIOMMU",
> +                   smmu_get_sid(&accel_dev->sdev));
> +        return false;
> +    }
> +
> +    if (s1_hwpt) {
> +        if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true, errp)) {
> +            return false;
> +        }
> +    }

I think we could have some improvements here.

The current flow is:
    (attached to s1_hwpt1)
    attach to bypass/abort_hwpt // no issue though.
    free s1_hwpt1
    alloc s2_hwpt2
    attach to s2_hwpt2

It could have been a flow like replace() in the kernel:
    (attached to s1_hwpt1)
    alloc s2_hwpt2
    attach to s2_hwpt2 /* skipping bypass/abort */
    free s1_hwpt

> +smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,\
[...]
> +    config = STE_CONFIG(&ste);
> +    if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
> +        if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev,
> +                                                   STE_CFG_ABORT(config),

This smmuv3_accel_uninstall_nested_ste() feels a bit redundant now.

Perhaps we could try something like this:

#define accel_dev_to_smmuv3(dev) ARM_SMMUV3(&dev->sdev.smmu)

static bool smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
                                                int sid, STE *ste)
{
    SMMUv3State *s = accel_dev_to_smmuv3(accel_dev);
    HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
    uint32_t config = STE_CONFIG(ste);
    SMMUS1Hwpt *s1_hwpt = NULL;
    uint64_t ste_0, ste_1;
    uint32_t hwpt_id = 0;

    if (!s->enabled) {
        if (s->gbpa_abort) {
            hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
        } else {
            hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
        }
    } else {
        if (!STE_VALID(ste) || STE_CFG_ABORT(config)) {
            hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
        } else if (STE_CFG_BYPASS(config))
            hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
        } else {
            // FIXME handle STE_CFG_S2_ENABLED()
        }
    }

    if (!hwpt_id) {
        uint64_t ste_0 = (uint64_t)ste->word[0] | (uint64_t)ste->word[1] << 32;
        uint64_t ste_1 = (uint64_t)ste->word[2] | (uint64_t)ste->word[3] << 32;
        struct iommu_hwpt_arm_smmuv3 nested_data = {
            .ste[2] = {
                cpu_to_le64(ste_0 & STE0_MASK),
                cpu_to_le64(ste_1 & STE1_MASK),
            },
        };

        trace_smmuv3_accel_install_nested_ste(sid, nested_data.ste[1],
                                              nested_data.ste[0]);
        s1_hwpt = g_new0(SMMUS1Hwpt, 1);
        [...]
        iommufd_backend_alloc_hwpt(..., &s1_hwpt->hwpt_id);
        hwpt_id = s1_hwpt->hwpt_id;
    }

    host_iommu_device_iommufd_attach_hwpt(.., hwpt_id);

    if (accel_dev->s1_hwpt) {
        iommufd_backend_free_id(idev->iommufd, accel_dev->s1_hwpt->hwpt_id);
    }
    accel_dev->s1_hwpt = s1_hwpt;
    return true;
}

> +bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange 
> *range,
> +                                           Error **errp)
> +{
> +    SMMUv3AccelState *s_accel = s->s_accel;
> +    SMMUv3AccelDevice *accel_dev;
> +
> +    if (!s_accel || !s_accel->vsmmu) {
> +        return true;
> +    }
> +
> +    QLIST_FOREACH(accel_dev, &s_accel->vsmmu->device_list, next) {
> +        uint32_t sid = smmu_get_sid(&accel_dev->sdev);
> +
> +        if (sid >= range->start && sid <= range->end) {
> +            if (!smmuv3_accel_install_nested_ste(s, &accel_dev->sdev,
> +                                                 sid, errp)) {
> +                return false;
> +            }
> +        }

This is a bit tricky..

I think CFGI_STE_RANGE shouldn't stop in the middle, if one of the
STEs fails. 

That being said, HW doesn't seem to propagate C_BAD_STE during a
CFGI_STE or CFGI_STE_RANGE, IIUIC. It reports C_BAD_STE event when
a transaction starts. If we want to perfectly mimic the hardware,
we'd have to set up a bad STE down to the HW, which will trigger a
C_BAD_STE vevent to be forwarded by vEVENTQ.

Nicolin

Reply via email to