On 7/16/2024 2:17 PM, Jane Jian wrote:
> For VCN/JPEG 4.0.3, use only the local addressing scheme.
> 
> - Mask bit higher than AID0 range
> 
> v2
> remain the case for mmhub use master XCC
> 
> Signed-off-by: Jane Jian <jane.j...@amd.com>

This patch is

        Reviewed-by: Lijo Lazar <lijo.la...@amd.com>

Thanks,
Lijo

> ---
>  drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 19 ++++++++--
>  drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c  | 46 ++++++++++++++++++++++--
>  2 files changed, 60 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c 
> b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> index 30a143ab592d..ad524ddc9760 100644
> --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> @@ -32,6 +32,9 @@
>  #include "vcn/vcn_4_0_3_sh_mask.h"
>  #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
>  
> +#define NORMALIZE_JPEG_REG_OFFSET(offset) \
> +             (offset & 0x1FFFF)
> +
>  enum jpeg_engin_status {
>       UVD_PGFSM_STATUS__UVDJ_PWR_ON  = 0,
>       UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2,
> @@ -824,7 +827,13 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring 
> *ring,
>  void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t 
> reg,
>                               uint32_t val, uint32_t mask)
>  {
> -     uint32_t reg_offset = (reg << 2);
> +     uint32_t reg_offset;
> +
> +     /* For VF, only local offsets should be used */
> +     if (amdgpu_sriov_vf(ring->adev))
> +             reg = NORMALIZE_JPEG_REG_OFFSET(reg);
> +
> +     reg_offset = (reg << 2);
>  
>       amdgpu_ring_write(ring, 
> PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
>               0, 0, PACKETJ_TYPE0));
> @@ -865,7 +874,13 @@ void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct 
> amdgpu_ring *ring,
>  
>  void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 
> uint32_t val)
>  {
> -     uint32_t reg_offset = (reg << 2);
> +     uint32_t reg_offset;
> +
> +     /* For VF, only local offsets should be used */
> +     if (amdgpu_sriov_vf(ring->adev))
> +             reg = NORMALIZE_JPEG_REG_OFFSET(reg);
> +
> +     reg_offset = (reg << 2);
>  
>       amdgpu_ring_write(ring, 
> PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
>               0, 0, PACKETJ_TYPE0));
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c 
> b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> index 101b120f6fbd..9bae95538b62 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> @@ -45,6 +45,9 @@
>  #define VCN_VID_SOC_ADDRESS_2_0              0x1fb00
>  #define VCN1_VID_SOC_ADDRESS_3_0     0x48300
>  
> +#define NORMALIZE_VCN_REG_OFFSET(offset) \
> +             (offset & 0x1FFFF)
> +
>  static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
>  static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
>  static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
> @@ -1375,6 +1378,43 @@ static uint64_t 
> vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring)
>                                   regUVD_RB_WPTR);
>  }
>  
> +static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, 
> uint32_t reg,
> +                             uint32_t val, uint32_t mask)
> +{
> +     /* For VF, only local offsets should be used */
> +     if (amdgpu_sriov_vf(ring->adev))
> +             reg = NORMALIZE_VCN_REG_OFFSET(reg);
> +
> +     amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
> +     amdgpu_ring_write(ring, reg << 2);
> +     amdgpu_ring_write(ring, mask);
> +     amdgpu_ring_write(ring, val);
> +}
> +
> +static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t 
> reg, uint32_t val)
> +{
> +     /* For VF, only local offsets should be used */
> +     if (amdgpu_sriov_vf(ring->adev))
> +             reg = NORMALIZE_VCN_REG_OFFSET(reg);
> +
> +     amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
> +     amdgpu_ring_write(ring, reg << 2);
> +     amdgpu_ring_write(ring, val);
> +}
> +
> +static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
> +                             unsigned int vmid, uint64_t pd_addr)
> +{
> +     struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
> +
> +     pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
> +
> +     /* wait for reg writes */
> +     vcn_v4_0_3_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
> +                                     vmid * hub->ctx_addr_distance,
> +                                     lower_32_bits(pd_addr), 0xffffffff);
> +}
> +
>  static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>  {
>       /* VCN engine access for HDP flush doesn't work when RRMT is enabled.
> @@ -1421,7 +1461,7 @@ static const struct amdgpu_ring_funcs 
> vcn_v4_0_3_unified_ring_vm_funcs = {
>       .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
>       .emit_ib = vcn_v2_0_enc_ring_emit_ib,
>       .emit_fence = vcn_v2_0_enc_ring_emit_fence,
> -     .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
> +     .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush,
>       .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush,
>       .test_ring = amdgpu_vcn_enc_ring_test_ring,
>       .test_ib = amdgpu_vcn_unified_ring_test_ib,
> @@ -1430,8 +1470,8 @@ static const struct amdgpu_ring_funcs 
> vcn_v4_0_3_unified_ring_vm_funcs = {
>       .pad_ib = amdgpu_ring_generic_pad_ib,
>       .begin_use = amdgpu_vcn_ring_begin_use,
>       .end_use = amdgpu_vcn_ring_end_use,
> -     .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
> -     .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
> +     .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
> +     .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
>       .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
>  };
>  

Reply via email to