RE: [PATCH] drm/amdgpu: fix vulkan test performance drop and hang on VI
> -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Rex Zhu > Sent: Monday, July 03, 2017 6:13 AM > To: amd-gfx@lists.freedesktop.org > Cc: Zhu, Rex > Subject: [PATCH] drm/amdgpu: fix vulkan test performance drop and hang > on VI > > caused by not program dynamic_cu_mask_addr in the KIQ MQD. > > v2: create struct vi_mqd_allocation in FB which will contain > 1. PM4 MQD structure. > 2. Write Pointer Poll Memory. > 3. Read Pointer Report Memory > 4. Dynamic CU Mask. > 5. Dynamic RB Mask. > > Change-Id: I22c840f1bf8d365f7df33a27d6b11e1aea8f2958 > Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 27 ++-- > drivers/gpu/drm/amd/include/vi_structs.h | 268 > +++ > 2 files changed, 285 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 1a75ab1..452cc5b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -40,7 +40,6 @@ > > #include "bif/bif_5_0_d.h" > #include "bif/bif_5_0_sh_mask.h" > - > #include "gca/gfx_8_0_d.h" > #include "gca/gfx_8_0_enum.h" > #include "gca/gfx_8_0_sh_mask.h" > @@ -2100,7 +2099,7 @@ static int gfx_v8_0_sw_init(void *handle) > return r; > > /* create MQD for all compute queues as well as KIQ for SRIOV case > */ > - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct > vi_mqd)); > + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct > vi_mqd_allocation)); > if (r) > return r; > > @@ -4715,9 +4714,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring > *ring) > uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; > uint32_t tmp; > > - /* init the mqd struct */ > - memset(mqd, 0, sizeof(struct vi_mqd)); > - > mqd->header = 0xC0310800; > mqd->compute_pipelinestat_enable = 0x0001; > mqd->compute_static_thread_mgmt_se0 = 0x; > @@ -4725,7 +4721,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring > *ring) > mqd->compute_static_thread_mgmt_se2 = 0x; > mqd->compute_static_thread_mgmt_se3 = 0x; > mqd->compute_misc_reserved = 0x0003; > - > + if (!(adev->flags & AMD_IS_APU)) { > + mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring- > >mqd_gpu_addr > + + offsetof(struct > vi_mqd_allocation, dyamic_cu_mask)); > + mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring- > >mqd_gpu_addr > + + offsetof(struct > vi_mqd_allocation, dyamic_cu_mask)); > + } > eop_base_addr = ring->eop_gpu_addr >> 8; > mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; > mqd->cp_hqd_eop_base_addr_hi = > upper_32_bits(eop_base_addr); > @@ -4900,7 +4901,7 @@ static int gfx_v8_0_kiq_init_queue(struct > amdgpu_ring *ring) > if (adev->gfx.in_reset) { /* for GPU_RESET case */ > /* reset MQD to a clean status */ > if (adev->gfx.mec.mqd_backup[mqd_idx]) > - memcpy(mqd, adev- > >gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); > + memcpy(mqd, adev- > >gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); > > /* reset ring buffer */ > ring->wptr = 0; > @@ -4916,6 +4917,9 @@ static int gfx_v8_0_kiq_init_queue(struct > amdgpu_ring *ring) > vi_srbm_select(adev, 0, 0, 0, 0); > mutex_unlock(&adev->srbm_mutex); > } else { > + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); > + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = > 0x; > + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = > 0x; > mutex_lock(&adev->srbm_mutex); > vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > gfx_v8_0_mqd_init(ring); > @@ -4929,7 +4933,7 @@ static int gfx_v8_0_kiq_init_queue(struct > amdgpu_ring *ring) > mutex_unlock(&adev->srbm_mutex); > > if (adev->gfx.mec.mqd_backup[mqd_idx]) > - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], > mqd, sizeof(*mqd)); > + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], > mqd, sizeof(struct vi_mqd_allocation)); > } > > return r; > @@ -4947,6 +4951,9 @@ sta
[PATCH] drm/amdgpu: fix vulkan test performance drop and hang on VI
caused by not program dynamic_cu_mask_addr in the KIQ MQD. v2: create struct vi_mqd_allocation in FB which will contain 1. PM4 MQD structure. 2. Write Pointer Poll Memory. 3. Read Pointer Report Memory 4. Dynamic CU Mask. 5. Dynamic RB Mask. Change-Id: I22c840f1bf8d365f7df33a27d6b11e1aea8f2958 Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 27 ++-- drivers/gpu/drm/amd/include/vi_structs.h | 268 +++ 2 files changed, 285 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1a75ab1..452cc5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -40,7 +40,6 @@ #include "bif/bif_5_0_d.h" #include "bif/bif_5_0_sh_mask.h" - #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" #include "gca/gfx_8_0_sh_mask.h" @@ -2100,7 +2099,7 @@ static int gfx_v8_0_sw_init(void *handle) return r; /* create MQD for all compute queues as well as KIQ for SRIOV case */ - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd)); + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); if (r) return r; @@ -4715,9 +4714,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; uint32_t tmp; - /* init the mqd struct */ - memset(mqd, 0, sizeof(struct vi_mqd)); - mqd->header = 0xC0310800; mqd->compute_pipelinestat_enable = 0x0001; mqd->compute_static_thread_mgmt_se0 = 0x; @@ -4725,7 +4721,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->compute_static_thread_mgmt_se2 = 0x; mqd->compute_static_thread_mgmt_se3 = 0x; mqd->compute_misc_reserved = 0x0003; - + if (!(adev->flags & AMD_IS_APU)) { + mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr ++ offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); + mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr ++ offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); + } eop_base_addr = ring->eop_gpu_addr >> 8; mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); @@ -4900,7 +4901,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.in_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; @@ -4916,6 +4917,9 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } else { + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0x; + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0x; mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v8_0_mqd_init(ring); @@ -4929,7 +4933,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); } return r; @@ -4947,6 +4951,9 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) int mqd_idx = ring - &adev->gfx.compute_ring[0]; if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0x; + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0x; mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v8_0_mqd_init(ring); @@ -4954,11 +4961,11 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_alloca
RE: [PATCH] drm/amdgpu: fix vulkan test performance drop and hang on VI
> -Original Message- > From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf > Of Rex Zhu > Sent: Monday, June 26, 2017 3:26 AM > To: amd-gfx@lists.freedesktop.org > Cc: Zhu, Rex > Subject: [PATCH] drm/amdgpu: fix vulkan test performance drop and hang > on VI > > caused by not program dynamic_cu_mask_addr in the KIQ MQD. > > v2: create struct vi_mqd_allocation in FB which will contain > 1. PM4 MQD structure. > 2. Write Pointer Poll Memory. > 3. Read Pointer Report Memory > 4. Dynamic CU Mask. > 5. Dynamic RB Mask. > > Change-Id: I22c840f1bf8d365f7df33a27d6b11e1aea8f2958 > Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 27 ++-- > drivers/gpu/drm/amd/include/vi_structs.h | 268 > +++ > 2 files changed, 285 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 1a75ab1..452cc5b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -40,7 +40,6 @@ > > #include "bif/bif_5_0_d.h" > #include "bif/bif_5_0_sh_mask.h" > - > #include "gca/gfx_8_0_d.h" > #include "gca/gfx_8_0_enum.h" > #include "gca/gfx_8_0_sh_mask.h" > @@ -2100,7 +2099,7 @@ static int gfx_v8_0_sw_init(void *handle) > return r; > > /* create MQD for all compute queues as well as KIQ for SRIOV case > */ > - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct > vi_mqd)); > + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct > vi_mqd_allocation)); > if (r) > return r; > > @@ -4715,9 +4714,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring > *ring) > uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; > uint32_t tmp; > > - /* init the mqd struct */ > - memset(mqd, 0, sizeof(struct vi_mqd)); > - > mqd->header = 0xC0310800; > mqd->compute_pipelinestat_enable = 0x0001; > mqd->compute_static_thread_mgmt_se0 = 0x; > @@ -4725,7 +4721,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring > *ring) > mqd->compute_static_thread_mgmt_se2 = 0x; > mqd->compute_static_thread_mgmt_se3 = 0x; > mqd->compute_misc_reserved = 0x0003; > - > + if (!(adev->flags & AMD_IS_APU)) { > + mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring- > >mqd_gpu_addr > + + offsetof(struct > vi_mqd_allocation, dyamic_cu_mask)); > + mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring- > >mqd_gpu_addr > + + offsetof(struct > vi_mqd_allocation, dyamic_cu_mask)); > + } > eop_base_addr = ring->eop_gpu_addr >> 8; > mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; > mqd->cp_hqd_eop_base_addr_hi = > upper_32_bits(eop_base_addr); > @@ -4900,7 +4901,7 @@ static int gfx_v8_0_kiq_init_queue(struct > amdgpu_ring *ring) > if (adev->gfx.in_reset) { /* for GPU_RESET case */ > /* reset MQD to a clean status */ > if (adev->gfx.mec.mqd_backup[mqd_idx]) > - memcpy(mqd, adev- > >gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); > + memcpy(mqd, adev- > >gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); > > /* reset ring buffer */ > ring->wptr = 0; > @@ -4916,6 +4917,9 @@ static int gfx_v8_0_kiq_init_queue(struct > amdgpu_ring *ring) > vi_srbm_select(adev, 0, 0, 0, 0); > mutex_unlock(&adev->srbm_mutex); > } else { > + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); > + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = > 0x; > + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = > 0x; > mutex_lock(&adev->srbm_mutex); > vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); > gfx_v8_0_mqd_init(ring); > @@ -4929,7 +4933,7 @@ static int gfx_v8_0_kiq_init_queue(struct > amdgpu_ring *ring) > mutex_unlock(&adev->srbm_mutex); > > if (adev->gfx.mec.mqd_backup[mqd_idx]) > - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], > mqd, sizeof(*mqd)); > + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], > mqd, sizeof(struct vi_mqd_allocation)); > } > > return r; > @@ -4947,6 +4951,9 @@ sta
[PATCH] drm/amdgpu: fix vulkan test performance drop and hang on VI
caused by not program dynamic_cu_mask_addr in the KIQ MQD. v2: create struct vi_mqd_allocation in FB which will contain 1. PM4 MQD structure. 2. Write Pointer Poll Memory. 3. Read Pointer Report Memory 4. Dynamic CU Mask. 5. Dynamic RB Mask. Change-Id: I22c840f1bf8d365f7df33a27d6b11e1aea8f2958 Signed-off-by: Rex Zhu --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c| 27 ++-- drivers/gpu/drm/amd/include/vi_structs.h | 268 +++ 2 files changed, 285 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1a75ab1..452cc5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -40,7 +40,6 @@ #include "bif/bif_5_0_d.h" #include "bif/bif_5_0_sh_mask.h" - #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" #include "gca/gfx_8_0_sh_mask.h" @@ -2100,7 +2099,7 @@ static int gfx_v8_0_sw_init(void *handle) return r; /* create MQD for all compute queues as well as KIQ for SRIOV case */ - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd)); + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); if (r) return r; @@ -4715,9 +4714,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; uint32_t tmp; - /* init the mqd struct */ - memset(mqd, 0, sizeof(struct vi_mqd)); - mqd->header = 0xC0310800; mqd->compute_pipelinestat_enable = 0x0001; mqd->compute_static_thread_mgmt_se0 = 0x; @@ -4725,7 +4721,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->compute_static_thread_mgmt_se2 = 0x; mqd->compute_static_thread_mgmt_se3 = 0x; mqd->compute_misc_reserved = 0x0003; - + if (!(adev->flags & AMD_IS_APU)) { + mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr ++ offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); + mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr ++ offsetof(struct vi_mqd_allocation, dyamic_cu_mask)); + } eop_base_addr = ring->eop_gpu_addr >> 8; mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); @@ -4900,7 +4901,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.in_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; @@ -4916,6 +4917,9 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } else { + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0x; + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0x; mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v8_0_mqd_init(ring); @@ -4929,7 +4933,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); } return r; @@ -4947,6 +4951,9 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) int mqd_idx = ring - &adev->gfx.compute_ring[0]; if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { + memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); + ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0x; + ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0x; mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v8_0_mqd_init(ring); @@ -4954,11 +4961,11 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_alloca