Re: [PATCH] drm/amdgpu: Add amdgpu suspend-resume code path under SRIOV

2022-09-26 Thread Alex Deucher
On Mon, Sep 26, 2022 at 4:21 PM Bokun Zhang  wrote:
>
> - Under SRIOV, we need to send REQ_GPU_FINI to the hypervisor
>   during the suspend time. Furthermore, we cannot request a
>   mode 1 reset under SRIOV as VF. Therefore, we will skip it
>   as it is called in suspend_noirq() function.
>
> - In the resume code path, we need to send REQ_GPU_INIT to the
>   hypervisor and also resume PSP IP block under SRIOV.
>
> Signed-off-by: Bokun Zhang 

Reviewed-by: Alex Deucher 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c   |  4 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 27 +-
>  2 files changed, 30 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> index c5fad52c649d..a5aee19ca30e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
> @@ -1057,6 +1057,10 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device 
> *adev)
>  {
> if (adev->flags & AMD_IS_APU)
> return false;
> +
> +   if (amdgpu_sriov_vf(adev))
> +   return false;
> +
>  #ifdef HAVE_PM_SUSPEND_TARGET_STATE
> return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
>  #else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index fb09dc32b4c0..c5c94ebd3d57 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3176,7 +3176,8 @@ static int amdgpu_device_ip_resume_phase1(struct 
> amdgpu_device *adev)
> continue;
> if (adev->ip_blocks[i].version->type == 
> AMD_IP_BLOCK_TYPE_COMMON ||
> adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC 
> ||
> -   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) 
> {
> +   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH 
> ||
> +   (adev->ip_blocks[i].version->type == 
> AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
>
> r = adev->ip_blocks[i].version->funcs->resume(adev);
> if (r) {
> @@ -4120,12 +4121,20 @@ static void amdgpu_device_evict_resources(struct 
> amdgpu_device *adev)
>  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
>  {
> struct amdgpu_device *adev = drm_to_adev(dev);
> +   int r = 0;
>
> if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
> return 0;
>
> adev->in_suspend = true;
>
> +   if (amdgpu_sriov_vf(adev)) {
> +   amdgpu_virt_fini_data_exchange(adev);
> +   r = amdgpu_virt_request_full_gpu(adev, false);
> +   if (r)
> +   return r;
> +   }
> +
> if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
> DRM_WARN("smart shift update failed\n");
>
> @@ -4153,6 +4162,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
> fbcon)
>
> amdgpu_device_ip_suspend_phase2(adev);
>
> +   if (amdgpu_sriov_vf(adev))
> +   amdgpu_virt_release_full_gpu(adev, false);
> +
> return 0;
>  }
>
> @@ -4171,6 +4183,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
> fbcon)
> struct amdgpu_device *adev = drm_to_adev(dev);
> int r = 0;
>
> +   if (amdgpu_sriov_vf(adev)) {
> +   r = amdgpu_virt_request_full_gpu(adev, true);
> +   if (r)
> +   return r;
> +   }
> +
> if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
> return 0;
>
> @@ -4185,6 +4203,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
> fbcon)
> }
>
> r = amdgpu_device_ip_resume(adev);
> +
> +   /* no matter what r is, always need to properly release full GPU */
> +   if (amdgpu_sriov_vf(adev)) {
> +   amdgpu_virt_init_data_exchange(adev);
> +   amdgpu_virt_release_full_gpu(adev, true);
> +   }
> +
> if (r) {
> dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", 
> r);
> return r;
> --
> 2.25.1
>


[PATCH] drm/amdgpu: Add amdgpu suspend-resume code path under SRIOV

2022-09-26 Thread Bokun Zhang
- Under SRIOV, we need to send REQ_GPU_FINI to the hypervisor
  during the suspend time. Furthermore, we cannot request a
  mode 1 reset under SRIOV as VF. Therefore, we will skip it
  as it is called in suspend_noirq() function.

- In the resume code path, we need to send REQ_GPU_INIT to the
  hypervisor and also resume PSP IP block under SRIOV.

Signed-off-by: Bokun Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c   |  4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 27 +-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index c5fad52c649d..a5aee19ca30e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1057,6 +1057,10 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device 
*adev)
 {
if (adev->flags & AMD_IS_APU)
return false;
+
+   if (amdgpu_sriov_vf(adev))
+   return false;
+
 #ifdef HAVE_PM_SUSPEND_TARGET_STATE
return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
 #else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index fb09dc32b4c0..c5c94ebd3d57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3176,7 +3176,8 @@ static int amdgpu_device_ip_resume_phase1(struct 
amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->type == 
AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
-   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP 
&& amdgpu_sriov_vf(adev))) {
 
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
@@ -4120,12 +4121,20 @@ static void amdgpu_device_evict_resources(struct 
amdgpu_device *adev)
 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 {
struct amdgpu_device *adev = drm_to_adev(dev);
+   int r = 0;
 
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
 
adev->in_suspend = true;
 
+   if (amdgpu_sriov_vf(adev)) {
+   amdgpu_virt_fini_data_exchange(adev);
+   r = amdgpu_virt_request_full_gpu(adev, false);
+   if (r)
+   return r;
+   }
+
if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
DRM_WARN("smart shift update failed\n");
 
@@ -4153,6 +4162,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
fbcon)
 
amdgpu_device_ip_suspend_phase2(adev);
 
+   if (amdgpu_sriov_vf(adev))
+   amdgpu_virt_release_full_gpu(adev, false);
+
return 0;
 }
 
@@ -4171,6 +4183,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
struct amdgpu_device *adev = drm_to_adev(dev);
int r = 0;
 
+   if (amdgpu_sriov_vf(adev)) {
+   r = amdgpu_virt_request_full_gpu(adev, true);
+   if (r)
+   return r;
+   }
+
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
 
@@ -4185,6 +4203,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
}
 
r = amdgpu_device_ip_resume(adev);
+
+   /* no matter what r is, always need to properly release full GPU */
+   if (amdgpu_sriov_vf(adev)) {
+   amdgpu_virt_init_data_exchange(adev);
+   amdgpu_virt_release_full_gpu(adev, true);
+   }
+
if (r) {
dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
return r;
-- 
2.25.1