RE: [PATCH] drm/amdgpu: fix double ucode load by PSP(v3)

2019-07-31 Thread Zhang, Hawking
No objection from me for this patch. But I was really shocked at first glance 
to the subject and thought how amdgpu driver survive with this bug in 
bare-metal case... It was then proved to be this is SRIOV specific bug because 
psp was initialized ahead of ih in sriov use case. The status.hw fix in 
suspend/resume call stack looks reasonable to me. Patch is

Reviewed-by: Hawking Zhang 

Regards,
Hawking
-Original Message-
From: Liu, Monk  
Sent: 2019年8月1日 11:43
To: Deucher, Alexander ; Koenig, Christian 
; Zhang, Hawking 
Cc: Deng, Emily ; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: fix double ucode load by PSP(v3)

If no objection I would submit those three patches, thanks

_
Monk Liu|GPU Virtualization Team |AMD


-Original Message-
From: Deng, Emily 
Sent: Wednesday, July 31, 2019 5:04 PM
To: Liu, Monk ; amd-gfx@lists.freedesktop.org
Cc: Liu, Monk 
Subject: RE: [PATCH] drm/amdgpu: fix double ucode load by PSP(v3)

All looks good to me. Reviewed-by: Emily Deng .

>-Original Message-
>From: amd-gfx  On Behalf Of Monk 
>Liu
>Sent: Wednesday, July 31, 2019 4:54 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Liu, Monk 
>Subject: [PATCH] drm/amdgpu: fix double ucode load by PSP(v3)
>
>previously the ucode loading of PSP was repreated, one executed in
>phase_1 init/re-init/resume and the other in fw_loading routine
>
>Avoid this double loading by clearing ip_blocks.status.hw in suspend or 
>reset prior to the FW loading and any block's hw_init/resume
>
>v2:
>still do the smu fw loading since it is needed by bare-metal
>
>v3:
>drop the change in reinit_early_sriov, just clear all block's status.hw 
>in the head place and set the status.hw after hw_init done is enough
>
>Signed-off-by: Monk Liu 
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 59
>+++---
> 1 file changed, 38 insertions(+), 21 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index 6cb358c..30436ba 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -1673,28 +1673,34 @@ static int amdgpu_device_fw_loading(struct 
>amdgpu_device *adev)
>
>   if (adev->asic_type >= CHIP_VEGA10) {
>   for (i = 0; i < adev->num_ip_blocks; i++) {
>-  if (adev->ip_blocks[i].version->type ==
>AMD_IP_BLOCK_TYPE_PSP) {
>-  if (adev->in_gpu_reset || adev->in_suspend) {
>-  if (amdgpu_sriov_vf(adev) && adev-
>>in_gpu_reset)
>-  break; /* sriov gpu reset, psp
>need to do hw_init before IH because of hw limit */
>-  r = adev->ip_blocks[i].version->funcs-
>>resume(adev);
>-  if (r) {
>-  DRM_ERROR("resume of IP
>block <%s> failed %d\n",
>+  if (adev->ip_blocks[i].version->type !=
>AMD_IP_BLOCK_TYPE_PSP)
>+  continue;
>+
>+  /* no need to do the fw loading again if already
>done*/
>+  if (adev->ip_blocks[i].status.hw == true)
>+  break;
>+
>+  if (adev->in_gpu_reset || adev->in_suspend) {
>+  r = adev->ip_blocks[i].version->funcs-
>>resume(adev);
>+  if (r) {
>+  DRM_ERROR("resume of IP block <%s>
>failed %d\n",
> adev-
>>ip_blocks[i].version->funcs->name, r);
>-  return r;
>-  }
>-  } else {
>-  r = adev->ip_blocks[i].version->funcs-
>>hw_init(adev);
>-  if (r) {
>-  DRM_ERROR("hw_init of IP
>block <%s> failed %d\n",
>-adev->ip_blocks[i].version-
>>funcs->name, r);
>-  return r;
>-  }
>+  return r;
>+  }
>+  } else {
>+  r = adev->ip_blocks[i].version->funcs-
>>hw_init(adev);
>+  if (r) {
>+

RE: [PATCH] drm/amdgpu: fix double ucode load by PSP(v3)

2019-07-31 Thread Liu, Monk
If no objection I would submit those three patches, thanks

_
Monk Liu|GPU Virtualization Team |AMD


-Original Message-
From: Deng, Emily  
Sent: Wednesday, July 31, 2019 5:04 PM
To: Liu, Monk ; amd-gfx@lists.freedesktop.org
Cc: Liu, Monk 
Subject: RE: [PATCH] drm/amdgpu: fix double ucode load by PSP(v3)

All looks good to me. Reviewed-by: Emily Deng .

>-Original Message-
>From: amd-gfx  On Behalf Of Monk 
>Liu
>Sent: Wednesday, July 31, 2019 4:54 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Liu, Monk 
>Subject: [PATCH] drm/amdgpu: fix double ucode load by PSP(v3)
>
>previously the ucode loading of PSP was repreated, one executed in
>phase_1 init/re-init/resume and the other in fw_loading routine
>
>Avoid this double loading by clearing ip_blocks.status.hw in suspend or 
>reset prior to the FW loading and any block's hw_init/resume
>
>v2:
>still do the smu fw loading since it is needed by bare-metal
>
>v3:
>drop the change in reinit_early_sriov, just clear all block's status.hw 
>in the head place and set the status.hw after hw_init done is enough
>
>Signed-off-by: Monk Liu 
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 59
>+++---
> 1 file changed, 38 insertions(+), 21 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index 6cb358c..30436ba 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -1673,28 +1673,34 @@ static int amdgpu_device_fw_loading(struct 
>amdgpu_device *adev)
>
>   if (adev->asic_type >= CHIP_VEGA10) {
>   for (i = 0; i < adev->num_ip_blocks; i++) {
>-  if (adev->ip_blocks[i].version->type ==
>AMD_IP_BLOCK_TYPE_PSP) {
>-  if (adev->in_gpu_reset || adev->in_suspend) {
>-  if (amdgpu_sriov_vf(adev) && adev-
>>in_gpu_reset)
>-  break; /* sriov gpu reset, psp
>need to do hw_init before IH because of hw limit */
>-  r = adev->ip_blocks[i].version->funcs-
>>resume(adev);
>-  if (r) {
>-  DRM_ERROR("resume of IP
>block <%s> failed %d\n",
>+  if (adev->ip_blocks[i].version->type !=
>AMD_IP_BLOCK_TYPE_PSP)
>+  continue;
>+
>+  /* no need to do the fw loading again if already
>done*/
>+  if (adev->ip_blocks[i].status.hw == true)
>+  break;
>+
>+  if (adev->in_gpu_reset || adev->in_suspend) {
>+  r = adev->ip_blocks[i].version->funcs-
>>resume(adev);
>+  if (r) {
>+  DRM_ERROR("resume of IP block <%s>
>failed %d\n",
> adev-
>>ip_blocks[i].version->funcs->name, r);
>-  return r;
>-  }
>-  } else {
>-  r = adev->ip_blocks[i].version->funcs-
>>hw_init(adev);
>-  if (r) {
>-  DRM_ERROR("hw_init of IP
>block <%s> failed %d\n",
>-adev->ip_blocks[i].version-
>>funcs->name, r);
>-  return r;
>-  }
>+  return r;
>+  }
>+  } else {
>+  r = adev->ip_blocks[i].version->funcs-
>>hw_init(adev);
>+  if (r) {
>+  DRM_ERROR("hw_init of IP block <%s>
>failed %d\n",
>+adev-
>>ip_blocks[i].version->funcs->name, r);
>+  return r;
>   }
>-  adev->ip_blocks[i].status.hw = true;
>   }
>+
>+  adev->ip_blocks[i].status.hw = true;
>+  break;
>   }
>   }
>+
>   r = amdgpu_pm_load_smu_firmware(adev, _version);
>
>   return r;
>@@ -213

RE: [PATCH] drm/amdgpu: fix double ucode load by PSP(v3)

2019-07-31 Thread Deng, Emily
All looks good to me. Reviewed-by: Emily Deng .

>-Original Message-
>From: amd-gfx  On Behalf Of Monk
>Liu
>Sent: Wednesday, July 31, 2019 4:54 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Liu, Monk 
>Subject: [PATCH] drm/amdgpu: fix double ucode load by PSP(v3)
>
>previously the ucode loading of PSP was repreated, one executed in
>phase_1 init/re-init/resume and the other in fw_loading routine
>
>Avoid this double loading by clearing ip_blocks.status.hw in suspend or reset
>prior to the FW loading and any block's hw_init/resume
>
>v2:
>still do the smu fw loading since it is needed by bare-metal
>
>v3:
>drop the change in reinit_early_sriov, just clear all block's status.hw in the
>head place and set the status.hw after hw_init done is enough
>
>Signed-off-by: Monk Liu 
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 59
>+++---
> 1 file changed, 38 insertions(+), 21 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index 6cb358c..30436ba 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -1673,28 +1673,34 @@ static int amdgpu_device_fw_loading(struct
>amdgpu_device *adev)
>
>   if (adev->asic_type >= CHIP_VEGA10) {
>   for (i = 0; i < adev->num_ip_blocks; i++) {
>-  if (adev->ip_blocks[i].version->type ==
>AMD_IP_BLOCK_TYPE_PSP) {
>-  if (adev->in_gpu_reset || adev->in_suspend) {
>-  if (amdgpu_sriov_vf(adev) && adev-
>>in_gpu_reset)
>-  break; /* sriov gpu reset, psp
>need to do hw_init before IH because of hw limit */
>-  r = adev->ip_blocks[i].version->funcs-
>>resume(adev);
>-  if (r) {
>-  DRM_ERROR("resume of IP
>block <%s> failed %d\n",
>+  if (adev->ip_blocks[i].version->type !=
>AMD_IP_BLOCK_TYPE_PSP)
>+  continue;
>+
>+  /* no need to do the fw loading again if already
>done*/
>+  if (adev->ip_blocks[i].status.hw == true)
>+  break;
>+
>+  if (adev->in_gpu_reset || adev->in_suspend) {
>+  r = adev->ip_blocks[i].version->funcs-
>>resume(adev);
>+  if (r) {
>+  DRM_ERROR("resume of IP block <%s>
>failed %d\n",
> adev-
>>ip_blocks[i].version->funcs->name, r);
>-  return r;
>-  }
>-  } else {
>-  r = adev->ip_blocks[i].version->funcs-
>>hw_init(adev);
>-  if (r) {
>-  DRM_ERROR("hw_init of IP
>block <%s> failed %d\n",
>-adev->ip_blocks[i].version-
>>funcs->name, r);
>-  return r;
>-  }
>+  return r;
>+  }
>+  } else {
>+  r = adev->ip_blocks[i].version->funcs-
>>hw_init(adev);
>+  if (r) {
>+  DRM_ERROR("hw_init of IP block <%s>
>failed %d\n",
>+adev-
>>ip_blocks[i].version->funcs->name, r);
>+  return r;
>   }
>-  adev->ip_blocks[i].status.hw = true;
>   }
>+
>+  adev->ip_blocks[i].status.hw = true;
>+  break;
>   }
>   }
>+
>   r = amdgpu_pm_load_smu_firmware(adev, _version);
>
>   return r;
>@@ -2136,7 +2142,9 @@ static int
>amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
>   if (r) {
>   DRM_ERROR("suspend of IP block <%s>
>failed %d\n",
> adev->ip_blocks[i].version->funcs-
>>name, r);
>+  return r;
>   }
>+  adev->ip_blocks[i].status.hw = false;
>   }
>   }
>
>@@ -2176,14 +2184,16 @@ static int
>amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
>   if (is_support_sw_smu(adev)) {
>   /* todo */
>   } else if (adev->powerplay.pp_funcs &&
>- adev->powerplay.pp_funcs->set_mp1_state)
>{
>+ adev->powerplay.pp_funcs-
>>set_mp1_state)