> -----Original Message-----
> From: Eric Auger <[email protected]>
> Sent: 12 February 2026 17:38
> To: Shameer Kolothum Thodi <[email protected]>; qemu-
> [email protected]; [email protected]
> Cc: [email protected]; [email protected]; [email protected]; Nicolin
> Chen <[email protected]>; Nathan Chen <[email protected]>; Matt
> Ochs <[email protected]>; Jiandi An <[email protected]>; Jason Gunthorpe
> <[email protected]>; [email protected];
> [email protected]; [email protected]; Krishnakant Jaju
> <[email protected]>
> Subject: Re: [PATCH v2 07/24] hw/arm/smmuv3-accel: Wire CMDQV ops into
> accel lifecycle
> 
> External email: Use caution opening links or attachments
> 
> 
> Hi Shameer,
> 
> On 2/6/26 3:48 PM, Shameer Kolothum wrote:
> > Integrate CMDQV ops support into the accelerated SMMUv3 path. When
> CMDQV
> > is enabled, the backend ops are used to initialize CMDQV state and to
> > allocate the vIOMMU instance. The current implementation connects the
> > Tegra241 CMDQV backend, but does not enable functional support yet.
> >
> > No functional changes intended.
> >
> > Signed-off-by: Shameer Kolothum <[email protected]>
> > ---
> >  hw/arm/smmuv3-accel.c   | 65
> ++++++++++++++++++++++++++++++++++++-----
> >  hw/arm/smmuv3-accel.h   |  5 ++++
> >  hw/arm/smmuv3.c         |  1 +
> >  include/hw/arm/smmuv3.h |  2 ++
> >  4 files changed, 66 insertions(+), 7 deletions(-)
> >
> > diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> > index 9036b14601..b1a8ab79b5 100644
> > --- a/hw/arm/smmuv3-accel.c
> > +++ b/hw/arm/smmuv3-accel.c
> > @@ -18,6 +18,7 @@
> >
> >  #include "smmuv3-internal.h"
> >  #include "smmuv3-accel.h"
> > +#include "tegra241-cmdqv.h"
> >
> >  /*
> >   * The root region aliases the global system memory, and
> shared_as_sysmem
> > @@ -520,6 +521,7 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s,
> HostIOMMUDeviceIOMMUFD *idev,
> >                            Error **errp)
> >  {
> >      SMMUv3AccelState *accel = s->s_accel;
> > +    const SMMUv3AccelCmdqvOps *cmdqv_ops = accel->cmdqv_ops;
> >      struct iommu_hwpt_arm_smmuv3 bypass_data = {
> >          .ste = { SMMU_STE_CFG_BYPASS | SMMU_STE_VALID, 0x0ULL },
> >      };
> > @@ -530,12 +532,24 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s,
> HostIOMMUDeviceIOMMUFD *idev,
> >      uint32_t viommu_id, hwpt_id;
> >      IOMMUFDViommu *viommu;
> >
> > -    if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
> > -                                      IOMMU_VIOMMU_TYPE_ARM_SMMUV3, 
> > s2_hwpt_id,
> > -                                      NULL, 0, &viommu_id, errp)) {
> > +    if (cmdqv_ops && (!cmdqv_ops->alloc_viommu || !cmdqv_ops-
> >alloc_veventq)) {
> > +        error_setg(errp, "CMDQV vIOMMU allocation not supported");
> >          return false;
> >      }
> >
> > +    if (cmdqv_ops) {
> > +        if (!cmdqv_ops->alloc_viommu(s, idev, &viommu_id, errp)) {
> > +            return false;
> > +        }
> > +    } else {
> > +        if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
> > +                                          IOMMU_VIOMMU_TYPE_ARM_SMMUV3,
> > +                                          s2_hwpt_id, NULL, 0, &viommu_id,
> > +                                          errp)) {
> this is not add-on code but rather a switch. Depending on the cmdq type,

Yeah. This is not add-on but a switch.

> sizeof(cmdqv->cmdqv_data) varies. Wondering if ops does not makes sense
> here either.
> 
> > +            return false;
> > +        }
> > +    }
> > +
> >      viommu = g_new0(IOMMUFDViommu, 1);
> >      viommu->viommu_id = viommu_id;
> >      viommu->s2_hwpt_id = s2_hwpt_id;
> > @@ -565,13 +579,21 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s,
> HostIOMMUDeviceIOMMUFD *idev,
> >          goto free_bypass_hwpt;
> >      }
> >
> > +    if (cmdqv_ops && !cmdqv_ops->alloc_veventq(s, errp)) {
> > +        goto free_veventq;
> > +    }
> > +
> >      /* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
> >      hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
> >      if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
> > -        goto free_veventq;
> > +        goto free_cmdqv_veventq;
> >      }
> >      return true;
> >
> > +free_cmdqv_veventq:
> > +    if (cmdqv_ops && cmdqv_ops->free_veventq) {
> > +        cmdqv_ops->free_veventq(s);
> > +    }
> >  free_veventq:
> >      smmuv3_accel_free_veventq(accel);
> >  free_bypass_hwpt:
> > @@ -579,7 +601,11 @@ free_bypass_hwpt:
> >  free_abort_hwpt:
> >      iommufd_backend_free_id(idev->iommufd, accel->abort_hwpt_id);
> >  free_viommu:
> > -    iommufd_backend_free_id(idev->iommufd, viommu->viommu_id);
> > +    if (cmdqv_ops && cmdqv_ops->free_viommu) {
> > +        cmdqv_ops->free_viommu(s);
> > +    } else {
> > +        iommufd_backend_free_id(idev->iommufd, viommu->viommu_id);
> > +    }
> >      g_free(viommu);
> >      accel->viommu = NULL;
> >      return false;
> > @@ -865,8 +891,17 @@ bool
> smmuv3_accel_attach_gbpa_hwpt(SMMUv3State *s, Error **errp)
> >
> >  void smmuv3_accel_reset(SMMUv3State *s)
> >  {
> > -     /* Attach a HWPT based on GBPA reset value */
> > -     smmuv3_accel_attach_gbpa_hwpt(s, NULL);
> > +    SMMUv3AccelState *accel = s->s_accel;
> > +
> > +    if (!accel) {
> > +        return;
> > +    }
> > +    /* Attach a HWPT based on GBPA reset value */
> > +    smmuv3_accel_attach_gbpa_hwpt(s, NULL);
> > +
> > +    if (accel->cmdqv_ops && accel->cmdqv_ops->reset) {
> > +        accel->cmdqv_ops->reset(s);
> > +    }
> >  }
> >
> >  static void smmuv3_accel_as_init(SMMUv3State *s)
> > @@ -886,6 +921,22 @@ static void smmuv3_accel_as_init(SMMUv3State
> *s)
> >      address_space_init(shared_as_sysmem, &root, "smmuv3-accel-as-
> sysmem");
> >  }
> >
> > +bool smmuv3_accel_cmdqv_init(SMMUv3State *s, Error **errp)
> > +{
> > +    SMMUv3AccelState *accel = s->s_accel;
> > +
> > +    if (!accel || !s->tegra241_cmdqv) {
> isn't tegra241_cmdqv also only accelerated. Why do we need to have both
> checks?

True.  accel check is not required.

> > +        return true;
> > +    }
> > +
> > +    accel->cmdqv_ops = tegra241_cmdqv_ops();
> > +    if (!accel->cmdqv_ops || !accel->cmdqv_ops->init) {
> > +        error_setg(errp, "CMDQV support not available");
> > +        return false;
> > +    }
> > +    return accel->cmdqv_ops->init(s, errp);
> > +}
> > +
> >  void smmuv3_accel_init(SMMUv3State *s)
> >  {
> >      SMMUState *bs = ARM_SMMU(s);
> > diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> > index ca087240e5..33da37bfc1 100644
> > --- a/hw/arm/smmuv3-accel.h
> > +++ b/hw/arm/smmuv3-accel.h
> > @@ -74,6 +74,7 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *s,
> void *cmd, SMMUDevice *sdev,
> >  void smmuv3_accel_idr_override(SMMUv3State *s);
> >  bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp);
> >  void smmuv3_accel_reset(SMMUv3State *s);
> > +bool smmuv3_accel_cmdqv_init(SMMUv3State *s, Error **errp);
> >  #else
> >  static inline void smmuv3_accel_init(SMMUv3State *s)
> >  {
> > @@ -110,6 +111,10 @@ static inline bool
> smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
> >  static inline void smmuv3_accel_reset(SMMUv3State *s)
> >  {
> >  }
> > +static bool smmuv3_accel_cmdqv_init(SMMUv3State *s, Error **errp)
> > +{
> > +    return true;
> > +}
> >  #endif
> >
> >  #endif /* HW_ARM_SMMUV3_ACCEL_H */
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 148af80efd..7858bf2c33 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -2034,6 +2034,7 @@ static void smmu_realize(DeviceState *d, Error
> **errp)
> >
> >      smmu_init_irq(s, dev);
> >      smmuv3_init_id_regs(s);
> > +    smmuv3_accel_cmdqv_init(s, errp);
> >  }
> >
> >  static const VMStateDescription vmstate_smmuv3_queue = {
> > diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
> > index 26b2fc42fd..87926f8cb3 100644
> > --- a/include/hw/arm/smmuv3.h
> > +++ b/include/hw/arm/smmuv3.h
> > @@ -73,6 +73,8 @@ struct SMMUv3State {
> >      bool ats;
> >      uint8_t oas;
> >      uint8_t ssidsize;
> > +    /* Support for NVIDIA Tegra241 SMMU CMDQV extension */
> > +    bool tegra241_cmdqv;
> so I really advocate for auto detection and I think in that context I
> still think a selection of ops depending on the type of the cmdqv may be
> more future proof.
> Also to me this should be within struct SMMUv3AccelState if cmdqv only
> works with accel. Or does emulation of cmdqv make sense?

Let me see if I can move all cmdqv related stuff to SMMUv3AccelState or not.

Emulation makes no sense as the whole purpose of this extension is to accelerate
the performance.

Thanks,
Shameer

Reply via email to