Hi Shameer,

On 5/19/26 12:36 PM, Shameer Kolothum wrote:
> Introduce tegra241_cmdqv_vintf_ptr() to route VCMDQ Page0 register
> accesses through the mmap'd VINTF page0 backing once a hardware
> queue has been allocated.
>
> There are two QEMU trapped MMIO apertures for VCMDQ Page0 registers:
>
>   - Direct VCMDQ Page0 aperture (offset 0x10000)
>   - VINTF Page0 (offset 0x30000)
>
> These are hardware aliases: they address the same underlying
> registers. A subsequent patch maps the VINTF aperture as a
> guest-direct RAM region; in this patch both remain QEMU-trapped.
>
> VCMDQ Page0 accesses operate in one of two mutually exclusive modes,
> depending on whether a hardware queue (IOMMU_HW_QUEUE_ALLOC) has
> been allocated for the VCMDQ:
>
>   Pre-alloc:  vintf_ptr is NULL. Both apertures use QEMU's register
do you mean vintf_page0 instead of vintf_ptr.

vintf_page0 is set on mmap, right? and not on hwqueue_alloc

Besides pre/post-alloc sounds a bit confusing to me since it can mean
queue_alloc or mmap.
If I understand correctly vintf_page0 can be set while hwqueue_alloc has
not been done yet and conversely, hence the double check below:

+    if (!cmdqv->vcmdq[index] || !cmdqv->vintf_page0) {


>               cache. Hardware is not yet engaged.
>
>   Post-alloc: vintf_ptr is valid. Both QEMU trapped apertures access
>               registers directly via the mmap'd vintf_page0 pointer,
>               bypassing the cache. Hardware is the single source of
>               truth.
>
> The pre-to-post-alloc transition is triggered by the IOMMUFD hardware
so here you mean alloc is hw queue alloc
> queue allocation. The tegra241_cmdqv_sync_vcmdq() copies any pre-alloc
> cached writes (CONS_INDX, PROD_INDX, CONFIG, GERRORN) into the mmap'd
> page so the guest's view survives the transition.
>
> CMDQV acceleration only becomes active once the guest enables VINTF
> and the corresponding HW QUEUE is allocated through IOMMUFD. Until
> then, all VCMDQ accesses are served from the emulated register cache
> with no real hardware command processing. This matches the CMDQV
> hardware specification: if the logical CMDQ index does not map to any
> allocated Virtual CMDQ, "the access is dropped with no Fault/Interrupt".

I still does not realy understand what would be the problem of directly
mapping the direct cmdqv page0 too - I understand that theoretically the
direct view is larger than the VINTF one though -. I still think a
proper explanation is missed in this patch of 22/32

Assuming the guest tries to set up some unmapped vcmdqs in the direct
cmdqv page and this eventually ends up writing into the VINTF view, what
would happen and in which terms does it violate the spec?
>
> Signed-off-by: Shameer Kolothum <[email protected]>
> ---
>  hw/arm/tegra241-cmdqv.c | 72 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 70 insertions(+), 2 deletions(-)
>
> diff --git a/hw/arm/tegra241-cmdqv.c b/hw/arm/tegra241-cmdqv.c
> index f4968520f3..a7e89905df 100644
> --- a/hw/arm/tegra241-cmdqv.c
> +++ b/hw/arm/tegra241-cmdqv.c
> @@ -39,6 +39,39 @@ static void tegra241_cmdqv_free_all_vcmdq(Tegra241CMDQV 
> *cmdqv)
>      }
>  }
>  
> +/* Pointer into the mmap'd VINTF page0 slot for @index, or NULL. */
@index: index of the vcmdq
@offset0, offset within vcmdq page0
> +static inline uint32_t *tegra241_cmdqv_vintf_ptr(Tegra241CMDQV *cmdqv,
> +                                                 int index, hwaddr offset0)
> +{
> +    if (!cmdqv->vcmdq[index] || !cmdqv->vintf_page0) {
> +        return NULL;
> +    }
> +    return (uint32_t *)(cmdqv->vintf_page0 +
> +                        (index * CMDQV_VCMDQ_STRIDE) +
> +                        (offset0 - CMDQV_VCMDQ_PAGE0_BASE));
> +}
> +
> +/* Flush cached pre-alloc writes into the mmap'd VINTF page0 slot. */
> +static void tegra241_cmdqv_sync_vcmdq(Tegra241CMDQV *cmdqv, int index)
> +{
> +    uint32_t *ptr;
> +
> +    ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, A_VCMDQ0_CONS_INDX);
> +    if (!ptr) {
> +        return;
> +    }
> +    *ptr = cmdqv->vcmdq_cons_indx[index];
> +
> +    ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, A_VCMDQ0_PROD_INDX);
> +    *ptr = cmdqv->vcmdq_prod_indx[index];
> +
> +    ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, A_VCMDQ0_CONFIG);
> +    *ptr = cmdqv->vcmdq_config[index];
> +
> +    ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, A_VCMDQ0_GERRORN);
> +    *ptr = cmdqv->vcmdq_gerrorn[index];
> +}
> +
>  /*
>   * Allocate a host HW VCMDQ from the current cached BASE / size for @index.
>   *
> @@ -94,6 +127,9 @@ static bool tegra241_cmdqv_setup_vcmdq(Tegra241CMDQV 
> *cmdqv, int index,
>      hw_queue->viommu = viommu;
>      cmdqv->vcmdq[index] = hw_queue;
>  
> +    /* Pre-alloc cached writes survive the cache-to-hardware transition. */
> +    tegra241_cmdqv_sync_vcmdq(cmdqv, index);
> +
>      return true;
>  }
>  
> @@ -108,11 +144,14 @@ static void 
> tegra241_cmdqv_setup_all_vcmdq(Tegra241CMDQV *cmdqv,
>  }
>  
>  /*
> - * Returns true if the per-VCMDQ CMDQ_EN_OK bit is set.
> + * Returns true if the per-VCMDQ CMDQ_EN_OK bit is set. Reads from the
> + * mmap'd VINTF page0 when the VCMDQ is allocated (HW is the source of
> + * truth post-alloc); otherwise reads from the register cache.
>   */
>  static bool tegra241_vcmdq_enabled(Tegra241CMDQV *cmdqv, int index)
>  {
> -    uint32_t status = cmdqv->vcmdq_status[index];
> +    uint32_t *ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, A_VCMDQ0_STATUS);
> +    uint32_t status = ptr ? *ptr : cmdqv->vcmdq_status[index];
>  
>      return status & R_VCMDQ0_STATUS_CMDQ_EN_OK_MASK;
>  }
> @@ -122,12 +161,21 @@ static bool tegra241_vcmdq_enabled(Tegra241CMDQV 
> *cmdqv, int index)
>   *
>   * The caller normalizes the MMIO offset such that @offset0 always refers
>   * to a VCMDQ0_* register, while @index selects the VCMDQ instance.
> + *
> + * If the VCMDQ is allocated and VINTF page0 is mmap'd, read directly
> + * from the VINTF page0 backing. Otherwise, fall back to the cache.
>   */
>  static uint64_t tegra241_cmdqv_read_vcmdq_page0(Tegra241CMDQV *cmdqv,
>                                                  hwaddr offset0, int index)
>  {
> +    uint32_t *ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, offset0);
>      uint64_t val = 0;
>  
> +    if (ptr) {
> +        val = *ptr;
> +        goto out;
> +    }
> +
>      switch (offset0) {
>      case A_VCMDQ0_CONS_INDX:
>          val = cmdqv->vcmdq_cons_indx[index];
> @@ -152,6 +200,7 @@ static uint64_t 
> tegra241_cmdqv_read_vcmdq_page0(Tegra241CMDQV *cmdqv,
>                        "%s unhandled read access at 0x%" PRIx64 "\n",
>                        __func__, offset0);
>      }
> +out:
>      trace_tegra241_cmdqv_read_vcmdq_page0(index, offset0, val);
>      return val;
>  }
> @@ -225,11 +274,29 @@ static uint64_t 
> tegra241_cmdqv_config_vintf_read(Tegra241CMDQV *cmdqv,
>   *
>   * Page 0 registers are all 32-bit; this helper is only called for 4-byte
>   * writes.
> + *
> + * If the VCMDQ is allocated and VINTF page0 is mmap'd, write directly
> + * to the VINTF page0 backing. Otherwise, update the cache.
>   */
>  static void tegra241_cmdqv_write_vcmdq_page0(Tegra241CMDQV *cmdqv,
>                                               hwaddr offset0, int index,
>                                               uint32_t value)
>  {
> +    uint32_t *ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, offset0);
> +
> +    if (ptr) {
> +        switch (offset0) {
> +        case A_VCMDQ0_CONS_INDX:
> +        case A_VCMDQ0_PROD_INDX:
> +        case A_VCMDQ0_CONFIG:
> +        case A_VCMDQ0_GERRORN:
> +            *ptr = value;
> +            goto out;
> +        default:
> +            break;
> +        }
> +    }
> +
>      switch (offset0) {
>      case A_VCMDQ0_CONS_INDX:
>          cmdqv->vcmdq_cons_indx[index] = value;
> @@ -253,6 +320,7 @@ static void 
> tegra241_cmdqv_write_vcmdq_page0(Tegra241CMDQV *cmdqv,
>                        "%s unhandled write access at 0x%" PRIx64 "\n",
>                        __func__, offset0);
>      }
> +out:
>      trace_tegra241_cmdqv_write_vcmdq_page0(index, offset0, value);
>  }
>  
Thanks

Eric


Reply via email to