Re: [PATCH] drm/amdgpu: fix for suspend/resume kiq fence fallback under sriov

2022-11-21 Thread Deucher, Alexander
[Public]

Please add:
Fixes: ec4927d463cb ("drm/amdgpu: fix for suspend/resume sequence under sriov")
With that:
Reviewed-by: Alex Deucher 

From: amd-gfx  on behalf of Shikang Fan 

Sent: Friday, November 18, 2022 4:51 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Fan, Shikang 
Subject: [PATCH] drm/amdgpu: fix for suspend/resume kiq fence fallback under 
sriov

- in device_resume, sriov configure interrupt should be in full access,
  so release_full_gpu should be done after kfd_resume.
- remove the previous workaround solution for sriov.

Signed-off-by: Shikang Fan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 23 +++---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3d5d5d49cfab..22723b4492a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4180,21 +4180,15 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)

 r = amdgpu_device_ip_resume(adev);

-   /* no matter what r is, always need to properly release full GPU */
-   if (amdgpu_sriov_vf(adev)) {
-   amdgpu_virt_init_data_exchange(adev);
-   amdgpu_virt_release_full_gpu(adev, true);
-   }
-
 if (r) {
 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", 
r);
-   return r;
+   goto exit;
 }
 amdgpu_fence_driver_hw_init(adev);

 r = amdgpu_device_ip_late_init(adev);
 if (r)
-   return r;
+   goto exit;

 queue_delayed_work(system_wq, &adev->delayed_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
@@ -4202,12 +4196,19 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
 if (!adev->in_s0ix) {
 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
 if (r)
-   return r;
+   goto exit;
+   }
+
+exit:
+   if (amdgpu_sriov_vf(adev)) {
+   amdgpu_virt_init_data_exchange(adev);
+   amdgpu_virt_release_full_gpu(adev, true);
 }

+   if (r)
+   return r;
+
 /* Make sure IB tests flushed */
-   if (amdgpu_sriov_vf(adev))
-   amdgpu_irq_gpu_reset_resume_helper(adev);
 flush_delayed_work(&adev->delayed_init_work);

 if (adev->in_s0ix) {
--
2.25.1

<>

[PATCH] drm/amdgpu: fix for suspend/resume kiq fence fallback under sriov

2022-11-18 Thread Shikang Fan
- in device_resume, sriov configure interrupt should be in full access,
  so release_full_gpu should be done after kfd_resume.
- remove the previous workaround solution for sriov.

Signed-off-by: Shikang Fan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 23 +++---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3d5d5d49cfab..22723b4492a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4180,21 +4180,15 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
 
r = amdgpu_device_ip_resume(adev);
 
-   /* no matter what r is, always need to properly release full GPU */
-   if (amdgpu_sriov_vf(adev)) {
-   amdgpu_virt_init_data_exchange(adev);
-   amdgpu_virt_release_full_gpu(adev, true);
-   }
-
if (r) {
dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
-   return r;
+   goto exit;
}
amdgpu_fence_driver_hw_init(adev);
 
r = amdgpu_device_ip_late_init(adev);
if (r)
-   return r;
+   goto exit;
 
queue_delayed_work(system_wq, &adev->delayed_init_work,
   msecs_to_jiffies(AMDGPU_RESUME_MS));
@@ -4202,12 +4196,19 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
fbcon)
if (!adev->in_s0ix) {
r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
if (r)
-   return r;
+   goto exit;
+   }
+
+exit:
+   if (amdgpu_sriov_vf(adev)) {
+   amdgpu_virt_init_data_exchange(adev);
+   amdgpu_virt_release_full_gpu(adev, true);
}
 
+   if (r)
+   return r;
+
/* Make sure IB tests flushed */
-   if (amdgpu_sriov_vf(adev))
-   amdgpu_irq_gpu_reset_resume_helper(adev);
flush_delayed_work(&adev->delayed_init_work);
 
if (adev->in_s0ix) {
-- 
2.25.1