Hi Shameer,
On 6/1/26 1:42 PM, Shameer Kolothum wrote:
> From: Nicolin Chen <[email protected]>
>
> Add support for allocating IOMMUFD hardware queues when the guest
> programs the VCMDQ BASE registers.
>
> VCMDQ_EN lives in VCMDQ_CONFIG, which is on the VINTF Page0 region
> that a later patch installs into guest MMIO — so QEMU won't trap its
> writes. Allocate the hardware queue instead once all of these are
> set: BASE programmed, CMDQ_ALLOC_MAP.ALLOC, and CMDQV / VINTF
> enabled. Each precondition write retries the allocation, so the
> guest may program them in any order.
>
> iommufd_backend_alloc_hw_queue() needs the guest physical address of
> the VCMDQ ring buffer, so allocation is deferred until the guest has
> populated BASE.
>
> If a hardware queue was previously allocated for the same VCMDQ,
> free it before reallocation. All allocated VCMDQs are freed when
> CMDQV or VINTF is disabled, or when the ALLOC bit is cleared.
>
> On allocation failure, set CMDQ_INIT_ERR and clear CMDQ_EN_OK in the
> cache so trapped guest reads see the failure rather than a queue
> that looks live. Clear them on a later successful allocation.
>
> Signed-off-by: Nicolin Chen <[email protected]>
> Co-developed-by: Shameer Kolothum <[email protected]>
> Signed-off-by: Shameer Kolothum <[email protected]>
> ---
> hw/arm/tegra241-cmdqv.h | 11 +++
> hw/arm/tegra241-cmdqv.c | 154 +++++++++++++++++++++++++++++++++++++---
> 2 files changed, 156 insertions(+), 9 deletions(-)
>
> diff --git a/hw/arm/tegra241-cmdqv.h b/hw/arm/tegra241-cmdqv.h
> index c4d327a9a5..84499b840d 100644
> --- a/hw/arm/tegra241-cmdqv.h
> +++ b/hw/arm/tegra241-cmdqv.h
> @@ -47,6 +47,7 @@ typedef struct Tegra241CMDQV {
> MemoryRegion mmio_cmdqv;
> qemu_irq irq;
> IOMMUFDVeventq *veventq;
> + IOMMUFDHWqueue *vcmdq[TEGRA241_CMDQV_MAX_CMDQ];
> void *vintf_page0;
>
> /* CMDQ-V Config page register cache */
> @@ -364,6 +365,16 @@ SMMU_CMDQV_VI_VCMDQi_BASE_H_(1)
> SMMU_CMDQV_VI_VCMDQi_CONS_INDX_BASE_DRAM_L_(1)
> SMMU_CMDQV_VI_VCMDQi_CONS_INDX_BASE_DRAM_H_(1)
>
> +static inline bool tegra241_cmdqv_enabled(Tegra241CMDQV *cmdqv)
> +{
> + return cmdqv->status & R_STATUS_CMDQV_ENABLED_MASK;
> +}
> +
> +static inline bool tegra241_vintf_enabled(Tegra241CMDQV *cmdqv)
> +{
> + return cmdqv->vintf_status & R_VINTF0_STATUS_ENABLE_OK_MASK;
> +}
> +
> const SMMUv3AccelCmdqvOps *tegra241_cmdqv_get_ops(void);
>
> #endif /* HW_ARM_TEGRA241_CMDQV_H */
> diff --git a/hw/arm/tegra241-cmdqv.c b/hw/arm/tegra241-cmdqv.c
> index a52e153501..54d95f4e4e 100644
> --- a/hw/arm/tegra241-cmdqv.c
> +++ b/hw/arm/tegra241-cmdqv.c
> @@ -18,6 +18,96 @@
> #include "tegra241-cmdqv.h"
> #include "trace.h"
>
> +static void tegra241_cmdqv_free_vcmdq(Tegra241CMDQV *cmdqv, int index)
> +{
> + IOMMUFDViommu *viommu = cmdqv->s_accel->viommu;
> + IOMMUFDHWqueue *vcmdq = cmdqv->vcmdq[index];
> +
> + if (!vcmdq) {
> + return;
> + }
> + iommufd_backend_free_id(viommu->iommufd, vcmdq->hw_queue_id);
> + g_free(vcmdq);
> + cmdqv->vcmdq[index] = NULL;
> +}
> +
> +static void tegra241_cmdqv_free_all_vcmdq(Tegra241CMDQV *cmdqv)
> +{
> + /* uapi/linux/iommufd.h: hw_queue destroy must be in descending @index.
> */
> + for (int i = (TEGRA241_CMDQV_MAX_CMDQ - 1); i >= 0; i--) {
> + tegra241_cmdqv_free_vcmdq(cmdqv, i);
> + }
basically the same comment as previous patch. I would add in the exit
phase reset at this stage too.
> +}
> +
> +/*
> + * Allocate a host HW VCMDQ from the current cached BASE / size for @index.
> + *
> + * Setup is a no-op (returns true) when any of the following preconditions
> + * isn't met yet:
> + * - BASE not prgrammed yet.
> + * - VCMDQ is not mapped to a VINTF (CMDQ_ALLOC_MAP.ALLOC == 0)
> + * - BASE / size don't resolve to a RAM region
> + * - CMDQV global enable or VINTF enable is not yet asserted
> + */
> +static bool tegra241_cmdqv_setup_vcmdq(Tegra241CMDQV *cmdqv, int index,
> + Error **errp)
> +{
> + SMMUv3AccelState *accel = cmdqv->s_accel;
> + uint64_t base_mask = (uint64_t)R_VCMDQ0_BASE_L_ADDR_MASK |
> + (uint64_t)R_VCMDQ0_BASE_H_ADDR_MASK << 32;
> + uint64_t addr = cmdqv->vcmdq_base[index] & base_mask;
> + uint64_t log2 = cmdqv->vcmdq_base[index] & R_VCMDQ0_BASE_L_LOG2SIZE_MASK;
> + uint64_t size = 1ULL << (log2 + 4);
> + IOMMUFDViommu *viommu = accel->viommu;
> + IOMMUFDHWqueue *hw_queue;
> + uint32_t hw_queue_id;
> +
> + /* BASE not programmed yet. */
> + if (!cmdqv->vcmdq_base[index]) {
> + return true;
> + }
> +
> + /* VCMDQ not yet mapped to a VINTF. */
> + if (!(cmdqv->cmdq_alloc_map[index] & R_CMDQ_ALLOC_MAP_0_ALLOC_MASK)) {
> + return true;
> + }
> +
> + if (!tegra241_cmdqv_enabled(cmdqv) || !tegra241_vintf_enabled(cmdqv)) {
> + return true;
> + }
nit: I think I would introduce an helper
vcmdq_hw_queue_ready_to_be_allocated(cmdqv, index)
where you could put a proper doc comment.
Not that once flag the base is set it returns true. ie. you do not check
that both L & H are set.
But anyway the queue will be reallocated
> +
> + tegra241_cmdqv_free_vcmdq(cmdqv, index);
> +
> + if (!iommufd_backend_alloc_hw_queue(viommu->iommufd, viommu->viommu_id,
> + IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV,
> + index, addr, size, &hw_queue_id,
> + errp)) {
> + /* Record the failure in the cache. */
> + cmdqv->vcmdq_gerror[index] |= R_VCMDQ0_GERROR_CMDQ_INIT_ERR_MASK;
> + cmdqv->vcmdq_status[index] &= ~R_VCMDQ0_STATUS_CMDQ_EN_OK_MASK;
> + return false;
> + }
> + hw_queue = g_new(IOMMUFDHWqueue, 1);
> + hw_queue->hw_queue_id = hw_queue_id;
> + hw_queue->viommu = viommu;
> + cmdqv->vcmdq[index] = hw_queue;
> +
> + cmdqv->vcmdq_gerror[index] &= ~R_VCMDQ0_GERROR_CMDQ_INIT_ERR_MASK;
> + cmdqv->vcmdq_status[index] |= R_VCMDQ0_STATUS_CMDQ_EN_OK_MASK;
> +
> + return true;
> +}
> +
> +static void tegra241_cmdqv_setup_all_vcmdq(Tegra241CMDQV *cmdqv,
> + Error **errp)
> +{
> + for (int i = 0; i < TEGRA241_CMDQV_MAX_CMDQ; i++) {
> + if (!tegra241_cmdqv_setup_vcmdq(cmdqv, i, errp)) {
> + return;
> + }
> + }
nit: you you could after the tegra241_cmdqv_free_all_vcmdq helper
> +}
> +
> /*
> * Read a VCMDQ Page 0 register (control/status) using VCMDQ0_* offsets.
> *
> @@ -145,7 +235,12 @@ static void
> tegra241_cmdqv_write_vcmdq_page0(Tegra241CMDQV *cmdqv,
> break;
> case A_VCMDQ0_CONFIG:
> if (value & R_VCMDQ0_CONFIG_CMDQ_EN_MASK) {
> - cmdqv->vcmdq_status[index] |= R_VCMDQ0_STATUS_CMDQ_EN_OK_MASK;
> + /* Report init error if any. */
> + if (!(cmdqv->vcmdq_gerror[index] &
> + R_VCMDQ0_GERROR_CMDQ_INIT_ERR_MASK)) {
> + cmdqv->vcmdq_status[index] |=
> + R_VCMDQ0_STATUS_CMDQ_EN_OK_MASK;
> + }
does it really belong to this patch?
> } else {
> cmdqv->vcmdq_status[index] &= ~R_VCMDQ0_STATUS_CMDQ_EN_OK_MASK;
> }
> @@ -169,16 +264,19 @@ static void
> tegra241_cmdqv_write_vcmdq_page0(Tegra241CMDQV *cmdqv,
> */
> static void tegra241_cmdqv_write_vcmdq_page1(Tegra241CMDQV *cmdqv,
> hwaddr offset0, int index,
> - uint32_t value, bool direct)
> + uint32_t value, bool direct,
> + Error **errp)
> {
> switch (offset0) {
> case A_VCMDQ0_BASE_L:
> cmdqv->vcmdq_base[index] =
> deposit64(cmdqv->vcmdq_base[index], 0, 32, value);
> + tegra241_cmdqv_setup_vcmdq(cmdqv, index, errp);
> break;
> case A_VCMDQ0_BASE_H:
> cmdqv->vcmdq_base[index] =
> deposit64(cmdqv->vcmdq_base[index], 32, 32, value);
> + tegra241_cmdqv_setup_vcmdq(cmdqv, index, errp);
> break;
> case A_VCMDQ0_CONS_INDX_BASE_DRAM_L:
> cmdqv->vcmdq_cons_indx_base[index] =
> @@ -202,11 +300,13 @@ static void
> tegra241_cmdqv_write_vcmdq_page1(Tegra241CMDQV *cmdqv,
> */
> static void tegra241_cmdqv_write_vcmdq_page1_64(Tegra241CMDQV *cmdqv,
> hwaddr offset0, int index,
> - uint64_t value, bool direct)
> + uint64_t value, bool direct,
> + Error **errp)
> {
> switch (offset0) {
> case A_VCMDQ0_BASE_L:
> cmdqv->vcmdq_base[index] = value;
> + tegra241_cmdqv_setup_vcmdq(cmdqv, index, errp);
> break;
> case A_VCMDQ0_CONS_INDX_BASE_DRAM_L:
> cmdqv->vcmdq_cons_indx_base[index] = value;
> @@ -272,8 +372,14 @@ static void
> tegra241_cmdqv_config_vintf_write(Tegra241CMDQV *cmdqv,
> if (value & R_VINTF0_CONFIG_ENABLE_MASK) {
> if (tegra241_cmdqv_mmap_vintf_page0(cmdqv, errp)) {
> cmdqv->vintf_status |= R_VINTF0_STATUS_ENABLE_OK_MASK;
> + /*
> + * VCMDQs whose BASE was programmed before VINTF was
> + * enabled need their hw_queue allocated now.
> + */
> + tegra241_cmdqv_setup_all_vcmdq(cmdqv, errp);
> }
> } else {
> + tegra241_cmdqv_free_all_vcmdq(cmdqv);
> tegra241_cmdqv_munmap_vintf_page0(cmdqv, errp);
> cmdqv->vintf_status &= ~R_VINTF0_STATUS_ENABLE_OK_MASK;
> }
> @@ -388,16 +494,37 @@ static void tegra241_cmdqv_writel_mmio(Tegra241CMDQV
> *cmdqv, hwaddr offset,
> cmdqv->config = value;
> if (value & R_CONFIG_CMDQV_EN_MASK) {
> cmdqv->status |= R_STATUS_CMDQV_ENABLED_MASK;
> + /*
> + * VCMDQs whose BASE was programmed before CMDQV was enabled
> + * need their hw_queue allocated now.
> + */
> + tegra241_cmdqv_setup_all_vcmdq(cmdqv, &local_err);
> } else {
> + tegra241_cmdqv_free_all_vcmdq(cmdqv);
> cmdqv->status &= ~R_STATUS_CMDQV_ENABLED_MASK;
> }
> break;
> case A_VI_INT_MASK_0 ... A_VI_INT_MASK_1:
> cmdqv->vi_int_mask[(offset - A_VI_INT_MASK_0) / 4] = value;
> break;
> - case A_CMDQ_ALLOC_MAP_0 ... A_CMDQ_ALLOC_MAP_1:
> - cmdqv->cmdq_alloc_map[(offset - A_CMDQ_ALLOC_MAP_0) / 4] = value;
> + case A_CMDQ_ALLOC_MAP_0 ... A_CMDQ_ALLOC_MAP_1: {
> + int idx = (offset - A_CMDQ_ALLOC_MAP_0) / 4;
> + bool was_alloc = cmdqv->cmdq_alloc_map[idx] &
> + R_CMDQ_ALLOC_MAP_0_ALLOC_MASK;
> + bool now_alloc = value & R_CMDQ_ALLOC_MAP_0_ALLOC_MASK;
> +
> + cmdqv->cmdq_alloc_map[idx] = value;
> + /*
> + * If the VCMDQ was already programmed (BASE) before mapping, fire
> + * setup on the ALLOC 0->1 transition; tear down on 1->0.
> + */
> + if (!was_alloc && now_alloc) {
> + tegra241_cmdqv_setup_vcmdq(cmdqv, idx, &local_err);
> + } else if (was_alloc && !now_alloc) {
> + tegra241_cmdqv_free_vcmdq(cmdqv, idx);
> + }
> break;
> + }
> case A_VINTF0_CONFIG ... A_VINTF0_LVCMDQ_ERR_MAP_3:
> tegra241_cmdqv_config_vintf_write(cmdqv, offset, value, &local_err);
> break;
> @@ -427,12 +554,14 @@ static void tegra241_cmdqv_writel_mmio(Tegra241CMDQV
> *cmdqv, hwaddr offset,
> offset -= CMDQV_VINTF_PAGE1_BASE - CMDQV_VCMDQ_PAGE1_BASE;
> index = (offset - CMDQV_VCMDQ_PAGE1_BASE) / CMDQV_VCMDQ_STRIDE;
> tegra241_cmdqv_write_vcmdq_page1(cmdqv,
> - offset - index * CMDQV_VCMDQ_STRIDE, index, value, false);
> + offset - index * CMDQV_VCMDQ_STRIDE, index, value, false,
> + &local_err);
> break;
> case A_VCMDQ0_BASE_L ... A_VCMDQ1_CONS_INDX_BASE_DRAM_H:
> index = (offset - CMDQV_VCMDQ_PAGE1_BASE) / CMDQV_VCMDQ_STRIDE;
> tegra241_cmdqv_write_vcmdq_page1(cmdqv,
> - offset - index * CMDQV_VCMDQ_STRIDE, index, value, true);
> + offset - index * CMDQV_VCMDQ_STRIDE, index, value, true,
> + &local_err);
> break;
> default:
> qemu_log_mask(LOG_UNIMP, "%s unhandled write access at 0x%" PRIx64
> "\n",
> @@ -451,6 +580,7 @@ static void tegra241_cmdqv_writel_mmio(Tegra241CMDQV
> *cmdqv, hwaddr offset,
> static void tegra241_cmdqv_writell_mmio(Tegra241CMDQV *cmdqv, hwaddr offset,
> uint64_t value)
> {
> + Error *local_err = NULL;
> int index;
>
> switch (offset) {
> @@ -463,18 +593,24 @@ static void tegra241_cmdqv_writell_mmio(Tegra241CMDQV
> *cmdqv, hwaddr offset,
> offset -= CMDQV_VINTF_PAGE1_BASE - CMDQV_VCMDQ_PAGE1_BASE;
> index = (offset - CMDQV_VCMDQ_PAGE1_BASE) / CMDQV_VCMDQ_STRIDE;
> tegra241_cmdqv_write_vcmdq_page1_64(cmdqv,
> - offset - index * CMDQV_VCMDQ_STRIDE, index, value, false);
> + offset - index * CMDQV_VCMDQ_STRIDE, index, value, false,
> + &local_err);
> break;
> case A_VCMDQ0_BASE_L ... A_VCMDQ1_CONS_INDX_BASE_DRAM_H:
> index = (offset - CMDQV_VCMDQ_PAGE1_BASE) / CMDQV_VCMDQ_STRIDE;
> tegra241_cmdqv_write_vcmdq_page1_64(cmdqv,
> - offset - index * CMDQV_VCMDQ_STRIDE, index, value, true);
> + offset - index * CMDQV_VCMDQ_STRIDE, index, value, true,
> + &local_err);
> break;
> default:
> qemu_log_mask(LOG_UNIMP,
> "%s unhandled 64-bit write at 0x%" PRIx64 " (WI)\n",
> __func__, offset);
> }
> +
> + if (local_err) {
> + error_report_err(local_err);
> + }
> }
>
> static void tegra241_cmdqv_write_mmio(void *opaque, hwaddr offset,
Otherwise looks good to me
Thanks
Eric