Hi Shameer, On 6/1/26 1:42 PM, Shameer Kolothum wrote: > hw/arm/tegra241-cmdqv: Use mmap'd VINTF Page 0 as VCMDQ backing the title is double here. By the way may I suggest hw/arm/tegra241-cmdqv: Use mmap'd VINTF page0 for mapped vcmdqs direct view
to emphasize this relates the direct view and only for mapped vcmdqs > > Introduce tegra241_cmdqv_vintf_ptr() to route VCMDQ Page 0 register > accesses through the mmap'd VINTF Page 0 backing once a hardware > queue has been allocated for the VCMDQ. > > The two QEMU-trapped Page 0 apertures (direct at 0x10000, VINTF at > 0x30000) are hardware aliases of the same underlying registers. A > subsequent patch installs the VINTF aperture as a RAM-device into > guest MMIO; in this patch both remain QEMU-trapped. > > The direct VCMDQ aperture stays QEMU-trapped (rather than aliased > to the VINTF mmap) so that writes to an unallocated VCMDQ remain > well-defined. The CMDQV architecture allows software to program a > VCMDQ through the direct aperture without first allocating it to a > VINTF; aliasing to the VINTF mmap would route those writes into > unallocated logical slots where the hardware silently drops them. > > A VCMDQ Page 0 access is served from one of two sources: > > - Cache-backed: no hw_queue is allocated for the VCMDQ > (HW_QUEUE_ALLOC has not yet succeeded). Both apertures use > QEMU's register cache. > > - HW-backed: HW_QUEUE_ALLOC has succeeded. Both apertures access > the registers directly through the mmap'd VINTF Page 0. > > tegra241_cmdqv_sync_vcmdq() copies any cached writes (CONS_INDX, > PROD_INDX, CONFIG, GERRORN) into the mmap'd page on the cache-to-HW > transition so the guest's earlier register state survives. > > Signed-off-by: Shameer Kolothum <[email protected]> > --- > hw/arm/tegra241-cmdqv.c | 73 +++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 73 insertions(+) > > diff --git a/hw/arm/tegra241-cmdqv.c b/hw/arm/tegra241-cmdqv.c > index 54d95f4e4e..dce7089697 100644 > --- a/hw/arm/tegra241-cmdqv.c > +++ b/hw/arm/tegra241-cmdqv.c > @@ -39,6 +39,47 @@ static void tegra241_cmdqv_free_all_vcmdq(Tegra241CMDQV > *cmdqv) > } > } > > +/* > + * Return a pointer into the mmap'd VINTF page0 for the VCMDQ Page 0 > + * register at @offset0 in VCMDQ slot @index, or NULL when the VCMDQ > + * has no hw_queue allocated or the VINTF page0 is not mmap'd. > + */ > +static inline uint32_t *tegra241_cmdqv_vintf_ptr(Tegra241CMDQV *cmdqv, > + int index, hwaddr offset0) > +{ > + if (!cmdqv->vcmdq[index] || !cmdqv->vintf_page0) { > + return NULL; > + } > + return (uint32_t *)(cmdqv->vintf_page0 + > + (index * CMDQV_VCMDQ_STRIDE) + > + (offset0 - CMDQV_VCMDQ_PAGE0_BASE)); > +} > + > +/* > + * Flush cached register writes into the mmap'd VINTF page0 after a > + * successful HW_QUEUE_ALLOC, so the guest's earlier writes survive > + * the cache-to-hardware transition. > + */ > +static void tegra241_cmdqv_sync_vcmdq(Tegra241CMDQV *cmdqv, int index) > +{ > + uint32_t *ptr; > + > + ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, A_VCMDQ0_CONS_INDX); > + if (!ptr) { > + return; > + } > + *ptr = cmdqv->vcmdq_cons_indx[index]; > + > + ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, A_VCMDQ0_PROD_INDX); > + *ptr = cmdqv->vcmdq_prod_indx[index]; > + > + ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, A_VCMDQ0_CONFIG); > + *ptr = cmdqv->vcmdq_config[index]; > + > + ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, A_VCMDQ0_GERRORN); > + *ptr = cmdqv->vcmdq_gerrorn[index]; > +} > + > /* > * Allocate a host HW VCMDQ from the current cached BASE / size for @index. > * > @@ -95,6 +136,9 @@ static bool tegra241_cmdqv_setup_vcmdq(Tegra241CMDQV > *cmdqv, int index, > cmdqv->vcmdq_gerror[index] &= ~R_VCMDQ0_GERROR_CMDQ_INIT_ERR_MASK; > cmdqv->vcmdq_status[index] |= R_VCMDQ0_STATUS_CMDQ_EN_OK_MASK; > > + /* Pre-alloc cached writes survive the cache-to-hardware transition. */ what about the opposite transition? > + tegra241_cmdqv_sync_vcmdq(cmdqv, index); > + > return true; > } > > @@ -113,13 +157,22 @@ static void > tegra241_cmdqv_setup_all_vcmdq(Tegra241CMDQV *cmdqv, > * > * The caller normalizes the MMIO offset such that @offset0 always refers > * to a VCMDQ0_* register, while @index selects the VCMDQ instance. > + * > + * If the VCMDQ is allocated and VINTF page0 is mmap'd, read directly > + * from the VINTF page0 backing. Otherwise, fall back to the cache. > */ > static uint64_t tegra241_cmdqv_read_vcmdq_page0(Tegra241CMDQV *cmdqv, > hwaddr offset0, int index, > bool direct) > { > + uint32_t *ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, offset0); > uint64_t val = 0; > > + if (ptr) { shouldn't you test "direct" as well? I know that later on this is will be backed by another mr but I think this would be clearer. > + val = *ptr; > + goto out; > + } > + > switch (offset0) { > case A_VCMDQ0_CONS_INDX: > val = cmdqv->vcmdq_cons_indx[index]; > @@ -144,6 +197,7 @@ static uint64_t > tegra241_cmdqv_read_vcmdq_page0(Tegra241CMDQV *cmdqv, > "%s unhandled read access at 0x%" PRIx64 "\n", > __func__, offset0); > } > +out: > trace_tegra241_cmdqv_read_vcmdq_page0(index, direct ? "direct" : "vi", > offset0, val); > return val; > @@ -220,11 +274,29 @@ static uint64_t > tegra241_cmdqv_config_vintf_read(Tegra241CMDQV *cmdqv, > * > * Page 0 registers are all 32-bit; this helper is only called for 4-byte > * writes. > + * > + * If the VCMDQ is allocated and VINTF page0 is mmap'd, write directly > + * to the VINTF page0 backing. Otherwise, update the cache. > */ > static void tegra241_cmdqv_write_vcmdq_page0(Tegra241CMDQV *cmdqv, > hwaddr offset0, int index, > uint32_t value, bool direct) > { > + uint32_t *ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, offset0); same here? > + > + if (ptr) { > + switch (offset0) { > + case A_VCMDQ0_CONS_INDX: > + case A_VCMDQ0_PROD_INDX: > + case A_VCMDQ0_CONFIG: > + case A_VCMDQ0_GERRORN: > + *ptr = value; > + goto out; > + default: > + break; > + } > + } > + > switch (offset0) { > case A_VCMDQ0_CONS_INDX: > cmdqv->vcmdq_cons_indx[index] = value; > @@ -255,6 +327,7 @@ static void > tegra241_cmdqv_write_vcmdq_page0(Tegra241CMDQV *cmdqv, > "%s unhandled write access at 0x%" PRIx64 "\n", > __func__, offset0); > } > +out: > trace_tegra241_cmdqv_write_vcmdq_page0(index, direct ? "direct" : "vi", by the way could be interesting to log in the trace event that mmapped was used instead of cache regs. besides this looks good to me Thanks Eric > offset0, value); > }
