Re: [PATCH] drm/amdgpu: Program GC registers through RLCG interface in gfx_v11/gmc_v11

2022-10-17 Thread Alex Deucher
On Mon, Oct 17, 2022 at 5:21 AM Yifan Zha  wrote:
>
> [Why]
> L1 blocks most of GC registers accessing by MMIO.
>
> [How]
> Use RLCG interface to program GC registers under SRIOV VF in full access time.

Acked-by: Alex Deucher 

>
> Signed-off-by: Yifan Zha 
> Reviewed-by: Hawking Zhang 
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 18 +++---
>  3 files changed, 13 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> index 0b0a72ca5695..7e80caa05060 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> @@ -111,7 +111,7 @@ static int init_interrupts_v11(struct amdgpu_device 
> *adev, uint32_t pipe_id)
>
> lock_srbm(adev, mec, pipe, 0, 0);
>
> -   WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL),
> +   WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
> CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
> CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 73106f53246d..e3842dc100d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -1571,7 +1571,7 @@ static void gfx_v11_0_init_compute_vmid(struct 
> amdgpu_device *adev)
> WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
>
> /* Enable trap for each kfd vmid. */
> -   data = RREG32(SOC15_REG_OFFSET(GC, 0, 
> regSPI_GDBG_PER_VMID_CNTL));
> +   data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
> data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 
> 1);
> }
> soc21_grbm_select(adev, 0, 0, 0, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index 846ccb6cf07d..66dfb574cc7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> @@ -186,6 +186,10 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device 
> *adev, uint32_t vmid,
> /* Use register 17 for GART */
> const unsigned eng = 17;
> unsigned int i;
> +   unsigned char hub_ip = 0;
> +
> +   hub_ip = (vmhub == AMDGPU_GFXHUB_0) ?
> +  GC_HWIP : MMHUB_HWIP;
>
> spin_lock(&adev->gmc.invalidate_lock);
> /*
> @@ -199,8 +203,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device 
> *adev, uint32_t vmid,
> if (use_semaphore) {
> for (i = 0; i < adev->usec_timeout; i++) {
> /* a read return value of 1 means semaphore acuqire */
> -   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
> -   hub->eng_distance * eng);
> +   tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
> +   hub->eng_distance * eng, hub_ip);
> if (tmp & 0x1)
> break;
> udelay(1);
> @@ -210,12 +214,12 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device 
> *adev, uint32_t vmid,
> DRM_ERROR("Timeout waiting for sem acquire in VM 
> flush!\n");
> }
>
> -   WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, 
> inv_req);
> +   WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, 
> inv_req, hub_ip);
>
> /* Wait for ACK with a delay.*/
> for (i = 0; i < adev->usec_timeout; i++) {
> -   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
> -   hub->eng_distance * eng);
> +   tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
> +   hub->eng_distance * eng, hub_ip);
> tmp &= 1 << vmid;
> if (tmp)
> break;
> @@ -229,8 +233,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device 
> *adev, uint32_t vmid,
>  * add semaphore release after invalidation,
>  * write with 0 means semaphore release
>  */
> -   WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
> - hub->eng_distance * eng, 0);
> +   WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
> + hub->eng_distance * eng, 0, hub_ip);
>
> /* Issue additional private vm invalidation to MMHUB */
> if ((vmhub != AMDGPU_GFXHUB_0) &&
> --
> 2.25.1
>


[PATCH] drm/amdgpu: Program GC registers through RLCG interface in gfx_v11/gmc_v11

2022-10-17 Thread Yifan Zha
[Why]
L1 blocks most of GC registers accessing by MMIO.

[How]
Use RLCG interface to program GC registers under SRIOV VF in full access time.

Signed-off-by: Yifan Zha 
Reviewed-by: Hawking Zhang 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 18 +++---
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
index 0b0a72ca5695..7e80caa05060 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -111,7 +111,7 @@ static int init_interrupts_v11(struct amdgpu_device *adev, 
uint32_t pipe_id)
 
lock_srbm(adev, mec, pipe, 0, 0);
 
-   WREG32(SOC15_REG_OFFSET(GC, 0, regCPC_INT_CNTL),
+   WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 73106f53246d..e3842dc100d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1571,7 +1571,7 @@ static void gfx_v11_0_init_compute_vmid(struct 
amdgpu_device *adev)
WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
 
/* Enable trap for each kfd vmid. */
-   data = RREG32(SOC15_REG_OFFSET(GC, 0, 
regSPI_GDBG_PER_VMID_CNTL));
+   data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
}
soc21_grbm_select(adev, 0, 0, 0, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 846ccb6cf07d..66dfb574cc7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -186,6 +186,10 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned int i;
+   unsigned char hub_ip = 0;
+
+   hub_ip = (vmhub == AMDGPU_GFXHUB_0) ?
+  GC_HWIP : MMHUB_HWIP;
 
spin_lock(&adev->gmc.invalidate_lock);
/*
@@ -199,8 +203,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
-   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
-   hub->eng_distance * eng);
+   tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+   hub->eng_distance * eng, hub_ip);
if (tmp & 0x1)
break;
udelay(1);
@@ -210,12 +214,12 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM 
flush!\n");
}
 
-   WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
+   WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, 
inv_req, hub_ip);
 
/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
-   tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
-   hub->eng_distance * eng);
+   tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
+   hub->eng_distance * eng, hub_ip);
tmp &= 1 << vmid;
if (tmp)
break;
@@ -229,8 +233,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
 * add semaphore release after invalidation,
 * write with 0 means semaphore release
 */
-   WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
- hub->eng_distance * eng, 0);
+   WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
+ hub->eng_distance * eng, 0, hub_ip);
 
/* Issue additional private vm invalidation to MMHUB */
if ((vmhub != AMDGPU_GFXHUB_0) &&
-- 
2.25.1