Am 05.11.19 um 07:32 schrieb Zhu, Changfeng:
> From: changzhu <changfeng....@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per
> register wr->wr, wr->rd much faster than previous muticycle per
> transaction done interface.  This has caused a problem where
> status registers requiring HW to update have a 1 cycle delay, due
> to the register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers
> the use of WAIT_REG_MEM operation 1 case only.So it needs to call
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
> update firmware in case firmware is too old to have function to realize
> dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
> moved to gfxhub in gfx10. So it needs to add dummy read in driver
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.

First of all thanks for getting your environment setup properly, we are 
finally making progress with that issue.

A bunch of nice to have comments below and two major bugs/typos which 
really needs to be fixed.

>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <changfeng....@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 50 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  7 ++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   5 files changed, 73 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>       uint32_t                        mec2_feature_version;
>       bool                            mec_fw_write_wait;
>       bool                            me_fw_write_wait;
> +     bool                            cp_fw_write_wait;
>       struct amdgpu_ring              gfx_ring[AMDGPU_MAX_GFX_RINGS];
>       unsigned                        num_gfx_rings;
>       struct amdgpu_ring              compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..814764723c26 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct 
> amdgpu_device *adev)
>       kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
> +{
> +     adev->gfx.cp_fw_write_wait = false;
> +
> +     switch (adev->asic_type) {
> +     case CHIP_NAVI10:
> +     case CHIP_NAVI12:
> +     case CHIP_NAVI14:
> +             if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +                 (adev->gfx.me_feature_version >= 27) &&
> +                 (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +                 (adev->gfx.pfp_feature_version >= 27) &&
> +                 (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +                 (adev->gfx.mec_feature_version >= 27))
> +                     adev->gfx.cp_fw_write_wait = true;
> +             break;
> +     default:
> +             break;
> +     }
> +
> +     if (adev->gfx.cp_fw_write_wait == false)
> +             DRM_WARN_ONCE("Warning: check cp_fw_version and update it to 
> realize \
> +                               GRBM requires 1-cycle delay in cp 
> firmware\n");
> +}
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>       const struct rlc_firmware_header_v2_1 *rlc_hdr;
> @@ -4768,6 +4794,28 @@ static void gfx_v10_0_ring_emit_reg_wait(struct 
> amdgpu_ring *ring, uint32_t reg,
>       gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +                                               uint32_t reg0, uint32_t reg1,
> +                                               uint32_t ref, uint32_t mask)
> +{
> +     int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +     struct amdgpu_device *adev = ring->adev;
> +     bool fw_version_ok = false;
> +
> +     gfx_v10_0_check_fw_write_wait(adev);
> +
> +     if (ring->funcs->type == AMDGPU_RING_TYPE_GFX ||
> +             ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)

That check is probably superfluous. A few lines below you are using the 
function in the gfx_v10_0_ring_funcs_gfx and 
gfx_v10_0_ring_funcs_compute, so the ring->funcs->type is always constant.

> +             fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +     if (fw_version_ok)
> +             gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +                                   ref, mask, 0x20);
> +     else
> +             amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +                                                        ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>                                     uint32_t me, uint32_t pipe,
> @@ -5158,6 +5206,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_gfx = {
>       .emit_tmz = gfx_v10_0_ring_emit_tmz,
>       .emit_wreg = gfx_v10_0_ring_emit_wreg,
>       .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +     .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
> @@ -5191,6 +5240,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v10_0_ring_funcs_compute = {
>       .pad_ib = amdgpu_ring_generic_pad_ib,
>       .emit_wreg = gfx_v10_0_ring_emit_wreg,
>       .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +     .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 9d5f900e3e1c..f52fcb895d51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -982,6 +982,13 @@ static void gfx_v9_0_check_fw_write_wait(struct 
> amdgpu_device *adev)
>       adev->gfx.me_fw_write_wait = false;
>       adev->gfx.mec_fw_write_wait = false;
>   
> +     if ((adev->gfx.mec_fw_version < 0x000001a5) ||
> +         (adev->gfx.mec_feature_version < 46) ||
> +         (adev->gfx.pfp_fw_version < 0x000000b7) ||
> +         (adev->gfx.pfp_feature_version < 46))
> +             DRM_WARN_ONCE("Warning: check cp_fw_version and update it to 
> realize \
> +                                        GRBM requires 1-cycle delay in cp 
> firmware\n");
> +

Not a hard requirement, but it would be nice to have this in a separate 
patch. So that gfx9 and gfx10 changes are clearly separated.

>       switch (adev->asic_type) {
>       case CHIP_VEGA10:
>               if ((adev->gfx.me_fw_version >= 0x0000009c) &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..9ff3ec1531ed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct 
> amdgpu_ring *ring,
>       amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>                             upper_32_bits(pd_addr));
>   
> -     amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -     /* wait for the invalidate to complete */
> -     amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -                               1 << vmid, 1 << vmid);
> +     amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +                                             hub->vm_inv_eng0_ack,

That register offset is wrong! This needs to be "hub->vm_inv_eng0_ack + 
eng".

> +                                             req, 1 << vmid);
>   
>       return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..4cf8e3d23c60 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct 
> amdgpu_ring *ring, uint32_t reg,
>                         SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +                                                                             
>                  uint32_t reg0, uint32_t reg1,
> +                                                                             
>                  uint32_t ref, uint32_t mask)
> +{
> +     amdgpu_ring_emit_wreg(ring, reg0, ref);
> +     /* wait for a cycle to reset vm_inv_eng*_ack */
> +     amdgpu_ring_emit_reg_wait(ring, reg0, mask, mask);

Well, that's exactly what won't work. Please use the following instead:

amdgpu_ring_emit_reg_wait(ring, reg1, 0, 0);

Regards,
Christian.

> +     amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
> +}
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs 
> sdma_v5_0_ring_funcs = {
>               6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>               /* sdma_v5_0_ring_emit_vm_flush */
>               SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +             SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>               10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, 
> vm fence */
>       .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>       .emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs 
> sdma_v5_0_ring_funcs = {
>       .pad_ib = sdma_v5_0_ring_pad_ib,
>       .emit_wreg = sdma_v5_0_ring_emit_wreg,
>       .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +     .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>       .init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>       .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>       .preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to