> -----Original Message-----
> From: Eric Auger <[email protected]>
> Sent: 12 February 2026 17:38
> To: Shameer Kolothum Thodi <[email protected]>; qemu-
> [email protected]; [email protected]
> Cc: [email protected]; [email protected]; [email protected]; Nicolin
> Chen <[email protected]>; Nathan Chen <[email protected]>; Matt
> Ochs <[email protected]>; Jiandi An <[email protected]>; Jason Gunthorpe
> <[email protected]>; [email protected];
> [email protected]; [email protected]; Krishnakant Jaju
> <[email protected]>
> Subject: Re: [PATCH v2 07/24] hw/arm/smmuv3-accel: Wire CMDQV ops into
> accel lifecycle
>
> External email: Use caution opening links or attachments
>
>
> Hi Shameer,
>
> On 2/6/26 3:48 PM, Shameer Kolothum wrote:
> > Integrate CMDQV ops support into the accelerated SMMUv3 path. When
> CMDQV
> > is enabled, the backend ops are used to initialize CMDQV state and to
> > allocate the vIOMMU instance. The current implementation connects the
> > Tegra241 CMDQV backend, but does not enable functional support yet.
> >
> > No functional changes intended.
> >
> > Signed-off-by: Shameer Kolothum <[email protected]>
> > ---
> > hw/arm/smmuv3-accel.c | 65
> ++++++++++++++++++++++++++++++++++++-----
> > hw/arm/smmuv3-accel.h | 5 ++++
> > hw/arm/smmuv3.c | 1 +
> > include/hw/arm/smmuv3.h | 2 ++
> > 4 files changed, 66 insertions(+), 7 deletions(-)
> >
> > diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> > index 9036b14601..b1a8ab79b5 100644
> > --- a/hw/arm/smmuv3-accel.c
> > +++ b/hw/arm/smmuv3-accel.c
> > @@ -18,6 +18,7 @@
> >
> > #include "smmuv3-internal.h"
> > #include "smmuv3-accel.h"
> > +#include "tegra241-cmdqv.h"
> >
> > /*
> > * The root region aliases the global system memory, and
> shared_as_sysmem
> > @@ -520,6 +521,7 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s,
> HostIOMMUDeviceIOMMUFD *idev,
> > Error **errp)
> > {
> > SMMUv3AccelState *accel = s->s_accel;
> > + const SMMUv3AccelCmdqvOps *cmdqv_ops = accel->cmdqv_ops;
> > struct iommu_hwpt_arm_smmuv3 bypass_data = {
> > .ste = { SMMU_STE_CFG_BYPASS | SMMU_STE_VALID, 0x0ULL },
> > };
> > @@ -530,12 +532,24 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s,
> HostIOMMUDeviceIOMMUFD *idev,
> > uint32_t viommu_id, hwpt_id;
> > IOMMUFDViommu *viommu;
> >
> > - if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
> > - IOMMU_VIOMMU_TYPE_ARM_SMMUV3,
> > s2_hwpt_id,
> > - NULL, 0, &viommu_id, errp)) {
> > + if (cmdqv_ops && (!cmdqv_ops->alloc_viommu || !cmdqv_ops-
> >alloc_veventq)) {
> > + error_setg(errp, "CMDQV vIOMMU allocation not supported");
> > return false;
> > }
> >
> > + if (cmdqv_ops) {
> > + if (!cmdqv_ops->alloc_viommu(s, idev, &viommu_id, errp)) {
> > + return false;
> > + }
> > + } else {
> > + if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
> > + IOMMU_VIOMMU_TYPE_ARM_SMMUV3,
> > + s2_hwpt_id, NULL, 0, &viommu_id,
> > + errp)) {
> this is not add-on code but rather a switch. Depending on the cmdq type,
Yeah. This is not add-on but a switch.
> sizeof(cmdqv->cmdqv_data) varies. Wondering if ops does not makes sense
> here either.
>
> > + return false;
> > + }
> > + }
> > +
> > viommu = g_new0(IOMMUFDViommu, 1);
> > viommu->viommu_id = viommu_id;
> > viommu->s2_hwpt_id = s2_hwpt_id;
> > @@ -565,13 +579,21 @@ smmuv3_accel_alloc_viommu(SMMUv3State *s,
> HostIOMMUDeviceIOMMUFD *idev,
> > goto free_bypass_hwpt;
> > }
> >
> > + if (cmdqv_ops && !cmdqv_ops->alloc_veventq(s, errp)) {
> > + goto free_veventq;
> > + }
> > +
> > /* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
> > hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
> > if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
> > - goto free_veventq;
> > + goto free_cmdqv_veventq;
> > }
> > return true;
> >
> > +free_cmdqv_veventq:
> > + if (cmdqv_ops && cmdqv_ops->free_veventq) {
> > + cmdqv_ops->free_veventq(s);
> > + }
> > free_veventq:
> > smmuv3_accel_free_veventq(accel);
> > free_bypass_hwpt:
> > @@ -579,7 +601,11 @@ free_bypass_hwpt:
> > free_abort_hwpt:
> > iommufd_backend_free_id(idev->iommufd, accel->abort_hwpt_id);
> > free_viommu:
> > - iommufd_backend_free_id(idev->iommufd, viommu->viommu_id);
> > + if (cmdqv_ops && cmdqv_ops->free_viommu) {
> > + cmdqv_ops->free_viommu(s);
> > + } else {
> > + iommufd_backend_free_id(idev->iommufd, viommu->viommu_id);
> > + }
> > g_free(viommu);
> > accel->viommu = NULL;
> > return false;
> > @@ -865,8 +891,17 @@ bool
> smmuv3_accel_attach_gbpa_hwpt(SMMUv3State *s, Error **errp)
> >
> > void smmuv3_accel_reset(SMMUv3State *s)
> > {
> > - /* Attach a HWPT based on GBPA reset value */
> > - smmuv3_accel_attach_gbpa_hwpt(s, NULL);
> > + SMMUv3AccelState *accel = s->s_accel;
> > +
> > + if (!accel) {
> > + return;
> > + }
> > + /* Attach a HWPT based on GBPA reset value */
> > + smmuv3_accel_attach_gbpa_hwpt(s, NULL);
> > +
> > + if (accel->cmdqv_ops && accel->cmdqv_ops->reset) {
> > + accel->cmdqv_ops->reset(s);
> > + }
> > }
> >
> > static void smmuv3_accel_as_init(SMMUv3State *s)
> > @@ -886,6 +921,22 @@ static void smmuv3_accel_as_init(SMMUv3State
> *s)
> > address_space_init(shared_as_sysmem, &root, "smmuv3-accel-as-
> sysmem");
> > }
> >
> > +bool smmuv3_accel_cmdqv_init(SMMUv3State *s, Error **errp)
> > +{
> > + SMMUv3AccelState *accel = s->s_accel;
> > +
> > + if (!accel || !s->tegra241_cmdqv) {
> isn't tegra241_cmdqv also only accelerated. Why do we need to have both
> checks?
True. accel check is not required.
> > + return true;
> > + }
> > +
> > + accel->cmdqv_ops = tegra241_cmdqv_ops();
> > + if (!accel->cmdqv_ops || !accel->cmdqv_ops->init) {
> > + error_setg(errp, "CMDQV support not available");
> > + return false;
> > + }
> > + return accel->cmdqv_ops->init(s, errp);
> > +}
> > +
> > void smmuv3_accel_init(SMMUv3State *s)
> > {
> > SMMUState *bs = ARM_SMMU(s);
> > diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> > index ca087240e5..33da37bfc1 100644
> > --- a/hw/arm/smmuv3-accel.h
> > +++ b/hw/arm/smmuv3-accel.h
> > @@ -74,6 +74,7 @@ bool smmuv3_accel_issue_inv_cmd(SMMUv3State *s,
> void *cmd, SMMUDevice *sdev,
> > void smmuv3_accel_idr_override(SMMUv3State *s);
> > bool smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp);
> > void smmuv3_accel_reset(SMMUv3State *s);
> > +bool smmuv3_accel_cmdqv_init(SMMUv3State *s, Error **errp);
> > #else
> > static inline void smmuv3_accel_init(SMMUv3State *s)
> > {
> > @@ -110,6 +111,10 @@ static inline bool
> smmuv3_accel_alloc_veventq(SMMUv3State *s, Error **errp)
> > static inline void smmuv3_accel_reset(SMMUv3State *s)
> > {
> > }
> > +static bool smmuv3_accel_cmdqv_init(SMMUv3State *s, Error **errp)
> > +{
> > + return true;
> > +}
> > #endif
> >
> > #endif /* HW_ARM_SMMUV3_ACCEL_H */
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index 148af80efd..7858bf2c33 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -2034,6 +2034,7 @@ static void smmu_realize(DeviceState *d, Error
> **errp)
> >
> > smmu_init_irq(s, dev);
> > smmuv3_init_id_regs(s);
> > + smmuv3_accel_cmdqv_init(s, errp);
> > }
> >
> > static const VMStateDescription vmstate_smmuv3_queue = {
> > diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
> > index 26b2fc42fd..87926f8cb3 100644
> > --- a/include/hw/arm/smmuv3.h
> > +++ b/include/hw/arm/smmuv3.h
> > @@ -73,6 +73,8 @@ struct SMMUv3State {
> > bool ats;
> > uint8_t oas;
> > uint8_t ssidsize;
> > + /* Support for NVIDIA Tegra241 SMMU CMDQV extension */
> > + bool tegra241_cmdqv;
> so I really advocate for auto detection and I think in that context I
> still think a selection of ops depending on the type of the cmdqv may be
> more future proof.
> Also to me this should be within struct SMMUv3AccelState if cmdqv only
> works with accel. Or does emulation of cmdqv make sense?
Let me see if I can move all cmdqv related stuff to SMMUv3AccelState or not.
Emulation makes no sense as the whole purpose of this extension is to accelerate
the performance.
Thanks,
Shameer