On 6/4/26 1:22 PM, Shameer Kolothum Thodi wrote:
>
>> -----Original Message-----
>> From: Eric Auger <[email protected]>
>> Sent: 04 June 2026 10:56
>> To: Shameer Kolothum Thodi <[email protected]>; qemu-
>> [email protected]; [email protected]
>> Cc: [email protected]; [email protected]; [email protected]; Nicolin
>> Chen <[email protected]>; Nathan Chen <[email protected]>; Matt
>> Ochs <[email protected]>; Jiandi An <[email protected]>; Jason Gunthorpe
>> <[email protected]>; [email protected]; Krishnakant Jaju
>> <[email protected]>; [email protected]
>> Subject: Re: [PATCH v6 19/31] hw/arm/tegra241-cmdqv: Use mmap'd VINTF
>> page0 as VCMDQ backing
>>
>> External email: Use caution opening links or attachments
>>
>>
>> Hi Shameer,
>>
>> On 6/1/26 1:42 PM, Shameer Kolothum wrote:
>>> hw/arm/tegra241-cmdqv: Use mmap'd VINTF Page 0 as VCMDQ backing
>> the title is double here.
> Ah..Ok.
>
>> By the way may I suggest
>> hw/arm/tegra241-cmdqv: Use mmap'd VINTF page0 for mapped vcmdqs
>> direct view
>>
>> to emphasize this relates the direct view and only for mapped vcmdqs
> It's true this eventually relates only to the direct aperture, but not yet.
> VINTF page0 only gets mapped into the guest in patch 21. Until then,
> for allocated vCMDQs we use the mmap'd VINTF page0 for both direct
> and VINTF aperture accesses.
>
> May be we can rename to:
> Route allocated VCMDQ Page0 accesses to the mmap'd VINTF page0
Indeed. Yes above makes sense. It is about allocated VCMDQs only
>
>>> Introduce tegra241_cmdqv_vintf_ptr() to route VCMDQ Page 0 register
>>> accesses through the mmap'd VINTF Page 0 backing once a hardware
>>> queue has been allocated for the VCMDQ.
>>>
>>> The two QEMU-trapped Page 0 apertures (direct at 0x10000, VINTF at
>>> 0x30000) are hardware aliases of the same underlying registers. A
>>> subsequent patch installs the VINTF aperture as a RAM-device into
>>> guest MMIO; in this patch both remain QEMU-trapped.
>>>
>>> The direct VCMDQ aperture stays QEMU-trapped (rather than aliased
>>> to the VINTF mmap) so that writes to an unallocated VCMDQ remain
>>> well-defined. The CMDQV architecture allows software to program a
>>> VCMDQ through the direct aperture without first allocating it to a
>>> VINTF; aliasing to the VINTF mmap would route those writes into
>>> unallocated logical slots where the hardware silently drops them.
>>>
>>> A VCMDQ Page 0 access is served from one of two sources:
>>>
>>> - Cache-backed: no hw_queue is allocated for the VCMDQ
>>> (HW_QUEUE_ALLOC has not yet succeeded). Both apertures use
>>> QEMU's register cache.
>>>
>>> - HW-backed: HW_QUEUE_ALLOC has succeeded. Both apertures access
>>> the registers directly through the mmap'd VINTF Page 0.
>>>
>>> tegra241_cmdqv_sync_vcmdq() copies any cached writes (CONS_INDX,
>>> PROD_INDX, CONFIG, GERRORN) into the mmap'd page on the cache-to-HW
>>> transition so the guest's earlier register state survives.
>>>
>>> Signed-off-by: Shameer Kolothum <[email protected]>
>>> ---
>>> hw/arm/tegra241-cmdqv.c | 73
>> +++++++++++++++++++++++++++++++++++++++++
>>> 1 file changed, 73 insertions(+)
>>>
>>> diff --git a/hw/arm/tegra241-cmdqv.c b/hw/arm/tegra241-cmdqv.c
>>> index 54d95f4e4e..dce7089697 100644
>>> --- a/hw/arm/tegra241-cmdqv.c
>>> +++ b/hw/arm/tegra241-cmdqv.c
>>> @@ -39,6 +39,47 @@ static void
>> tegra241_cmdqv_free_all_vcmdq(Tegra241CMDQV *cmdqv)
>>> }
>>> }
>>>
>>> +/*
>>> + * Return a pointer into the mmap'd VINTF page0 for the VCMDQ Page 0
>>> + * register at @offset0 in VCMDQ slot @index, or NULL when the VCMDQ
>>> + * has no hw_queue allocated or the VINTF page0 is not mmap'd.
>>> + */
>>> +static inline uint32_t *tegra241_cmdqv_vintf_ptr(Tegra241CMDQV
>> *cmdqv,
>>> + int index, hwaddr offset0)
>>> +{
>>> + if (!cmdqv->vcmdq[index] || !cmdqv->vintf_page0) {
>>> + return NULL;
>>> + }
>>> + return (uint32_t *)(cmdqv->vintf_page0 +
>>> + (index * CMDQV_VCMDQ_STRIDE) +
>>> + (offset0 - CMDQV_VCMDQ_PAGE0_BASE));
>>> +}
>>> +
>>> +/*
>>> + * Flush cached register writes into the mmap'd VINTF page0 after a
>>> + * successful HW_QUEUE_ALLOC, so the guest's earlier writes survive
>>> + * the cache-to-hardware transition.
>>> + */
>>> +static void tegra241_cmdqv_sync_vcmdq(Tegra241CMDQV *cmdqv, int
>> index)
>>> +{
>>> + uint32_t *ptr;
>>> +
>>> + ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index,
>> A_VCMDQ0_CONS_INDX);
>>> + if (!ptr) {
>>> + return;
>>> + }
>>> + *ptr = cmdqv->vcmdq_cons_indx[index];
>>> +
>>> + ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index,
>> A_VCMDQ0_PROD_INDX);
>>> + *ptr = cmdqv->vcmdq_prod_indx[index];
>>> +
>>> + ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, A_VCMDQ0_CONFIG);
>>> + *ptr = cmdqv->vcmdq_config[index];
>>> +
>>> + ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, A_VCMDQ0_GERRORN);
>>> + *ptr = cmdqv->vcmdq_gerrorn[index];
>>> +}
>>> +
>>> /*
>>> * Allocate a host HW VCMDQ from the current cached BASE / size for
>> @index.
>>> *
>>> @@ -95,6 +136,9 @@ static bool
>> tegra241_cmdqv_setup_vcmdq(Tegra241CMDQV *cmdqv, int index,
>>> cmdqv->vcmdq_gerror[index] &=
>> ~R_VCMDQ0_GERROR_CMDQ_INIT_ERR_MASK;
>>> cmdqv->vcmdq_status[index] |=
>> R_VCMDQ0_STATUS_CMDQ_EN_OK_MASK;
>>> + /* Pre-alloc cached writes survive the cache-to-hardware transition. */
>> what about the opposite transition?
> Yeah. We don’t do anything now. I think better to reset the cache on
> opposite transition.
OK. I would add this explanation.
>
>>> + tegra241_cmdqv_sync_vcmdq(cmdqv, index);
>>> +
>>> return true;
>>> }
>>>
>>> @@ -113,13 +157,22 @@ static void
>> tegra241_cmdqv_setup_all_vcmdq(Tegra241CMDQV *cmdqv,
>>> *
>>> * The caller normalizes the MMIO offset such that @offset0 always refers
>>> * to a VCMDQ0_* register, while @index selects the VCMDQ instance.
>>> + *
>>> + * If the VCMDQ is allocated and VINTF page0 is mmap'd, read directly
>>> + * from the VINTF page0 backing. Otherwise, fall back to the cache.
>>> */
>>> static uint64_t tegra241_cmdqv_read_vcmdq_page0(Tegra241CMDQV
>> *cmdqv,
>>> hwaddr offset0, int index,
>>> bool direct)
>>> {
>>> + uint32_t *ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, offset0);
>>> uint64_t val = 0;
>>>
>>> + if (ptr) {
>> shouldn't you test "direct" as well? I know that later on this is will
>> be backed by another mr but I think this would be clearer.
> Is the suggestion here to just limit this to "direct"? We can't do that.
> Both apertures alias the same registers, and for an allocated and
> mmapped VINTF page0 we access the hw directly.
>
> The vintf page0 gets mapped to the guest only in patch 21, so
> after that only the direct aperture is trapped here anyway.
Yes I agree this would be true only once patch 21 is applied. Sorry for
the noise.
>
>>> + val = *ptr;
>>> + goto out;
>>> + }
>>> +
>>> switch (offset0) {
>>> case A_VCMDQ0_CONS_INDX:
>>> val = cmdqv->vcmdq_cons_indx[index];
>>> @@ -144,6 +197,7 @@ static uint64_t
>> tegra241_cmdqv_read_vcmdq_page0(Tegra241CMDQV *cmdqv,
>>> "%s unhandled read access at 0x%" PRIx64 "\n",
>>> __func__, offset0);
>>> }
>>> +out:
>>> trace_tegra241_cmdqv_read_vcmdq_page0(index, direct ? "direct" : "vi",
>>> offset0, val);
>>> return val;
>>> @@ -220,11 +274,29 @@ static uint64_t
>> tegra241_cmdqv_config_vintf_read(Tegra241CMDQV *cmdqv,
>>> *
>>> * Page 0 registers are all 32-bit; this helper is only called for 4-byte
>>> * writes.
>>> + *
>>> + * If the VCMDQ is allocated and VINTF page0 is mmap'd, write directly
>>> + * to the VINTF page0 backing. Otherwise, update the cache.
>>> */
>>> static void tegra241_cmdqv_write_vcmdq_page0(Tegra241CMDQV
>> *cmdqv,
>>> hwaddr offset0, int index,
>>> uint32_t value, bool direct)
>>> {
>>> + uint32_t *ptr = tegra241_cmdqv_vintf_ptr(cmdqv, index, offset0);
>> same here?
>>> +
>>> + if (ptr) {
>>> + switch (offset0) {
>>> + case A_VCMDQ0_CONS_INDX:
>>> + case A_VCMDQ0_PROD_INDX:
>>> + case A_VCMDQ0_CONFIG:
>>> + case A_VCMDQ0_GERRORN:
>>> + *ptr = value;
>>> + goto out;
>>> + default:
>>> + break;
>>> + }
>>> + }
>>> +
>>> switch (offset0) {
>>> case A_VCMDQ0_CONS_INDX:
>>> cmdqv->vcmdq_cons_indx[index] = value;
>>> @@ -255,6 +327,7 @@ static void
>> tegra241_cmdqv_write_vcmdq_page0(Tegra241CMDQV *cmdqv,
>>> "%s unhandled write access at 0x%" PRIx64 "\n",
>>> __func__, offset0);
>>> }
>>> +out:
>>> trace_tegra241_cmdqv_write_vcmdq_page0(index, direct ? "direct" : "vi",
>> by the way could be interesting to log in the trace event that mmapped
>> was used instead of cache regs.
> Could do.
>
> Thanks,
> Shameer
Thanks
Eric