On 7/16/2024 1:29 PM, Jane Jian wrote:
> For VCN/JPEG 4.0.3, use only the local addressing scheme.
> 
> - Mask bit higher than AID0 range
> - Remove gmc v9 mmhub vmid replacement, since the bit will be masked later in 
> register write/wait
> 
> Signed-off-by: Jane Jian <jane.j...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    |  5 ---
>  drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 19 ++++++++--
>  drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c  | 46 ++++++++++++++++++++++--
>  3 files changed, 60 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index b73136d390cc..2c7b4002ed72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -844,11 +844,6 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
> *adev, uint32_t vmid,
>       req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
>       ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>  
> -     if (vmhub >= AMDGPU_MMHUB0(0))
> -             inst = 0;
> -     else
> -             inst = vmhub;
> -

This doesn't look correct. This is also used to identify the KIQ to be
used to perform flush operation and it goes through master XCC in case
of MMHUB.

Thanks,
Lijo

>       /* This is necessary for SRIOV as well as for GFXOFF to function
>        * properly under bare metal
>        */
> diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
> b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> index 30a143ab592d..ad524ddc9760 100644
> --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> @@ -32,6 +32,9 @@
>  #include "vcn/vcn_4_0_3_sh_mask.h"
>  #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
>  
> +#define NORMALIZE_JPEG_REG_OFFSET(offset) \
> +             (offset & 0x1FFFF)
> +
>  enum jpeg_engin_status {
>       UVD_PGFSM_STATUS__UVDJ_PWR_ON  = 0,
>       UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2,
> @@ -824,7 +827,13 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring 
> *ring,
>  void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t 
> reg,
>                               uint32_t val, uint32_t mask)
>  {
> -     uint32_t reg_offset = (reg << 2);
> +     uint32_t reg_offset;
> +
> +     /* For VF, only local offsets should be used */
> +     if (amdgpu_sriov_vf(ring->adev))
> +             reg = NORMALIZE_JPEG_REG_OFFSET(reg);
> +
> +     reg_offset = (reg << 2);
>  
>       amdgpu_ring_write(ring, 
> PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
>               0, 0, PACKETJ_TYPE0));
> @@ -865,7 +874,13 @@ void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct 
> amdgpu_ring *ring,
>  
>  void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 
> uint32_t val)
>  {
> -     uint32_t reg_offset = (reg << 2);
> +     uint32_t reg_offset;
> +
> +     /* For VF, only local offsets should be used */
> +     if (amdgpu_sriov_vf(ring->adev))
> +             reg = NORMALIZE_JPEG_REG_OFFSET(reg);
> +
> +     reg_offset = (reg << 2);
>  
>       amdgpu_ring_write(ring, 
> PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
>               0, 0, PACKETJ_TYPE0));
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
> b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> index 101b120f6fbd..9bae95538b62 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> @@ -45,6 +45,9 @@
>  #define VCN_VID_SOC_ADDRESS_2_0              0x1fb00
>  #define VCN1_VID_SOC_ADDRESS_3_0     0x48300
>  
> +#define NORMALIZE_VCN_REG_OFFSET(offset) \
> +             (offset & 0x1FFFF)
> +
>  static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
>  static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
>  static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
> @@ -1375,6 +1378,43 @@ static uint64_t 
> vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring)
>                                   regUVD_RB_WPTR);
>  }
>  
> +static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, 
> uint32_t reg,
> +                             uint32_t val, uint32_t mask)
> +{
> +     /* For VF, only local offsets should be used */
> +     if (amdgpu_sriov_vf(ring->adev))
> +             reg = NORMALIZE_VCN_REG_OFFSET(reg);
> +
> +     amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
> +     amdgpu_ring_write(ring, reg << 2);
> +     amdgpu_ring_write(ring, mask);
> +     amdgpu_ring_write(ring, val);
> +}
> +
> +static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t 
> reg, uint32_t val)
> +{
> +     /* For VF, only local offsets should be used */
> +     if (amdgpu_sriov_vf(ring->adev))
> +             reg = NORMALIZE_VCN_REG_OFFSET(reg);
> +
> +     amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
> +     amdgpu_ring_write(ring, reg << 2);
> +     amdgpu_ring_write(ring, val);
> +}
> +
> +static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
> +                             unsigned int vmid, uint64_t pd_addr)
> +{
> +     struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
> +
> +     pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
> +
> +     /* wait for reg writes */
> +     vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
> +                                     vmid * hub->ctx_addr_distance,
> +                                     lower_32_bits(pd_addr), 0xffffffff);
> +}
> +
>  static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>  {
>       /* VCN engine access for HDP flush doesn't work when RRMT is enabled.
> @@ -1421,7 +1461,7 @@ static const struct amdgpu_ring_funcs 
> vcn_v4_0_3_unified_ring_vm_funcs = {
>       .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
>       .emit_ib = vcn_v2_0_enc_ring_emit_ib,
>       .emit_fence = vcn_v2_0_enc_ring_emit_fence,
> -     .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
> +     .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush,
>       .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush,
>       .test_ring = amdgpu_vcn_enc_ring_test_ring,
>       .test_ib = amdgpu_vcn_unified_ring_test_ib,
> @@ -1430,8 +1470,8 @@ static const struct amdgpu_ring_funcs 
> vcn_v4_0_3_unified_ring_vm_funcs = {
>       .pad_ib = amdgpu_ring_generic_pad_ib,
>       .begin_use = amdgpu_vcn_ring_begin_use,
>       .end_use = amdgpu_vcn_ring_end_use,
> -     .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
> -     .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
> +     .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
> +     .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
>       .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
>  };
>  

Reply via email to