[PATCH] drm/amdgpu: put the audio codec into suspend state before gpu reset V3

2020-04-28 Thread Evan Quan
At default, the autosuspend delay of audio controller is 3S. If the
gpu reset is triggered within 3S(after audio controller idle),
the audio controller may be unable into suspended state. Then
the sudden gpu reset will cause some audio errors. The change
here is targeted to resolve this.

However if the audio controller is in use when the gpu reset
triggered, this change may be still not enough to put the
audio controller into suspend state. Under this case, the
gpu reset will still proceed but there will be a warning
message printed("failed to suspend display audio").

V2: limit this for BACO and mode1 reset only
V3: try 1st to use pm_runtime_autosuspend_expiration() to
query how much time is left. Use default setting on
failure

Change-Id: I33d85e6fcad1882eb33f9cde8916d57be8d5a87a
Signed-off-by: Evan Quan 
Reviewed-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 75 ++
 1 file changed, 75 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index fb4ed0284f12..0a47e60e3c4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -69,6 +69,7 @@
 
 #include 
 #include 
+#include 
 
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
@@ -4145,6 +4146,64 @@ static void amdgpu_device_unlock_adev(struct 
amdgpu_device *adev)
mutex_unlock(&adev->lock_reset);
 }
 
+static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
+{
+   struct pci_dev *p = NULL;
+
+   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+   adev->pdev->bus->number, 1);
+   if (p) {
+   pm_runtime_enable(&(p->dev));
+   pm_runtime_resume(&(p->dev));
+   }
+}
+
+static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
+{
+   enum amd_reset_method reset_method;
+   struct pci_dev *p = NULL;
+   u64 expires;
+
+   /*
+* For now, only BACO and mode1 reset are confirmed
+* to suffer the audio issue without proper suspended.
+*/
+   reset_method = amdgpu_asic_reset_method(adev);
+   if ((reset_method != AMD_RESET_METHOD_BACO) &&
+(reset_method != AMD_RESET_METHOD_MODE1))
+   return -EINVAL;
+
+   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+   adev->pdev->bus->number, 1);
+   if (!p)
+   return -ENODEV;
+
+   expires = pm_runtime_autosuspend_expiration(&(p->dev));
+   if (!expires)
+   /*
+* If we cannot get the audio device autosuspend delay,
+* a fixed 4S interval will be used. Considering 3S is
+* the audio controller default autosuspend delay setting.
+* 4S used here is guaranteed to cover that.
+*/
+   expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4L;
+
+   while (!pm_runtime_status_suspended(&(p->dev))) {
+   if (!pm_runtime_suspend(&(p->dev)))
+   break;
+
+   if (expires < ktime_get_mono_fast_ns()) {
+   dev_warn(adev->dev, "failed to suspend display 
audio\n");
+   /* TODO: abort the succeeding gpu reset? */
+   return -ETIMEDOUT;
+   }
+   }
+
+   pm_runtime_disable(&(p->dev));
+
+   return 0;
+}
+
 /**
  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
  *
@@ -4169,6 +4228,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
bool use_baco =
(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
true : false;
+   bool audio_suspended = false;
 
/*
 * Flush RAM to disk so that after reboot
@@ -4226,6 +4286,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
return 0;
}
 
+   /*
+* Try to put the audio codec into suspend state
+* before gpu reset started.
+*
+* Due to the power domain of the graphics device
+* is shared with AZ power domain. Without this,
+* we may change the audio hardware from behind
+* the audio driver's back. That will trigger
+* some audio codec errors.
+*/
+   if (!amdgpu_device_suspend_display_audio(tmp_adev))
+   audio_suspended = true;
+
amdgpu_ras_set_error_query_ready(tmp_adev, false);
 
cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
@@ -4338,6 +4411,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/*unlock kfd: SRIOV would do it separately */
if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
  

Re: [PATCH] drm/amdgpu: put the audio codec into suspend state before gpu reset V2

2020-04-28 Thread Alex Deucher
On Tue, Apr 28, 2020 at 12:08 AM Quan, Evan  wrote:
>
> Hi Alex,
>
> The pm_runtime_autosuspend_expiration() return 0 due to ->use_autosuspend and 
> autosuspend_delay are all zeros.
> This seems not kernel specific. As I can see this on 5.6-drm-next kernel and 
> ubuntu original 5.3.46 kernel.
> Any insights why that happened?
> And maybe a compromise is: try the pm_runtime_autosuspend_expiration() first. 
> And if failed(report 0), use a fixed interval(3S).

Seems fine.

Alex

>
> Regards,
> Evan
> -Original Message-
> From: Alex Deucher 
> Sent: Wednesday, April 22, 2020 9:35 PM
> To: Quan, Evan 
> Cc: amd-gfx list ; Deucher, Alexander 
> 
> Subject: Re: [PATCH] drm/amdgpu: put the audio codec into suspend state 
> before gpu reset V2
>
> On Tue, Apr 21, 2020 at 10:42 PM Evan Quan  wrote:
> >
> > At default, the autosuspend delay of audio controller is 3S. If the
> > gpu reset is triggered within 3S(after audio controller idle), the
> > audio controller may be unable into suspended state. Then the sudden
> > gpu reset will cause some audio errors. The change here is targeted to
> > resolve this.
> >
> > However if the audio controller is in use when the gpu reset
> > triggered, this change may be still not enough to put the audio
> > controller into suspend state. Under this case, the gpu reset will
> > still proceed but there will be a warning message printed("failed to
> > suspend display audio").
> >
> > V2: limit this for BACO and mode1 reset only
> >
> > Change-Id: I33d85e6fcad1882eb33f9cde8916d57be8d5a87a
> > Signed-off-by: Evan Quan 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 70
> > ++
> >  1 file changed, 70 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 2d4b78d96426..70f43b1aed78 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -69,6 +69,7 @@
> >
> >  #include 
> >  #include 
> > +#include 
> >
> >  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
> >  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> > @@ -4146,6 +4147,59 @@ static void amdgpu_device_unlock_adev(struct 
> > amdgpu_device *adev)
> > mutex_unlock(&adev->lock_reset);  }
> >
> > +static void amdgpu_device_resume_display_audio(struct amdgpu_device
> > +*adev) {
> > +   struct pci_dev *p = NULL;
> > +
> > +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> > +   adev->pdev->bus->number, 1);
> > +   if (p) {
> > +   pm_runtime_enable(&(p->dev));
> > +   pm_runtime_resume(&(p->dev));
> > +   }
> > +}
> > +
> > +static int amdgpu_device_suspend_display_audio(struct amdgpu_device
> > +*adev) {
> > +   enum amd_reset_method reset_method;
> > +   struct pci_dev *p = NULL;
> > +   unsigned long end_jiffies;
> > +
> > +   /*
> > +* For now, only BACO and mode1 reset are confirmed
> > +* to suffer the audio issue without proper suspended.
> > +*/
> > +   reset_method = amdgpu_asic_reset_method(adev);
> > +   if ((reset_method != AMD_RESET_METHOD_BACO) &&
> > +(reset_method != AMD_RESET_METHOD_MODE1))
> > +   return -EINVAL;
> > +
> > +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> > +   adev->pdev->bus->number, 1);
> > +   if (!p)
> > +   return -ENODEV;
> > +
> > +   /*
> > +* 3S is the audio controller default autosuspend delay setting.
> > +* 4S used here is guaranteed to cover that.
> > +*/
>
> Instead of hardcoding 3S, we should probably use
> pm_runtime_autosuspend_expiration() to query how much time is left and then 
> use that.  That way this will work even if userspace has changed the delay.  
> With that fixed:
> Reviewed-by: Alex Deucher 
>
> Alex
>
>
> > +   end_jiffies = msecs_to_jiffies(4000) + jiffies;
> > +   while (!pm_runtime_status_suspended(&(p->dev))) {
> > +   if (!pm_runtime_suspend(&(p->dev)))
> > +   break;
> > +
> > +   if (time_after(jiffies, end_jiffies)) {
> > +   dev_warn(adev->dev, "failed to suspend

RE: [PATCH] drm/amdgpu: put the audio codec into suspend state before gpu reset V2

2020-04-27 Thread Quan, Evan
Hi Alex,

The pm_runtime_autosuspend_expiration() return 0 due to ->use_autosuspend and 
autosuspend_delay are all zeros.
This seems not kernel specific. As I can see this on 5.6-drm-next kernel and 
ubuntu original 5.3.46 kernel.
Any insights why that happened?
And maybe a compromise is: try the pm_runtime_autosuspend_expiration() first. 
And if failed(report 0), use a fixed interval(3S).

Regards,
Evan
-Original Message-
From: Alex Deucher  
Sent: Wednesday, April 22, 2020 9:35 PM
To: Quan, Evan 
Cc: amd-gfx list ; Deucher, Alexander 

Subject: Re: [PATCH] drm/amdgpu: put the audio codec into suspend state before 
gpu reset V2

On Tue, Apr 21, 2020 at 10:42 PM Evan Quan  wrote:
>
> At default, the autosuspend delay of audio controller is 3S. If the 
> gpu reset is triggered within 3S(after audio controller idle), the 
> audio controller may be unable into suspended state. Then the sudden 
> gpu reset will cause some audio errors. The change here is targeted to 
> resolve this.
>
> However if the audio controller is in use when the gpu reset 
> triggered, this change may be still not enough to put the audio 
> controller into suspend state. Under this case, the gpu reset will 
> still proceed but there will be a warning message printed("failed to 
> suspend display audio").
>
> V2: limit this for BACO and mode1 reset only
>
> Change-Id: I33d85e6fcad1882eb33f9cde8916d57be8d5a87a
> Signed-off-by: Evan Quan 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 70 
> ++
>  1 file changed, 70 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 2d4b78d96426..70f43b1aed78 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -69,6 +69,7 @@
>
>  #include 
>  #include 
> +#include 
>
>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> @@ -4146,6 +4147,59 @@ static void amdgpu_device_unlock_adev(struct 
> amdgpu_device *adev)
> mutex_unlock(&adev->lock_reset);  }
>
> +static void amdgpu_device_resume_display_audio(struct amdgpu_device 
> +*adev) {
> +   struct pci_dev *p = NULL;
> +
> +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> +   adev->pdev->bus->number, 1);
> +   if (p) {
> +   pm_runtime_enable(&(p->dev));
> +   pm_runtime_resume(&(p->dev));
> +   }
> +}
> +
> +static int amdgpu_device_suspend_display_audio(struct amdgpu_device 
> +*adev) {
> +   enum amd_reset_method reset_method;
> +   struct pci_dev *p = NULL;
> +   unsigned long end_jiffies;
> +
> +   /*
> +* For now, only BACO and mode1 reset are confirmed
> +* to suffer the audio issue without proper suspended.
> +*/
> +   reset_method = amdgpu_asic_reset_method(adev);
> +   if ((reset_method != AMD_RESET_METHOD_BACO) &&
> +(reset_method != AMD_RESET_METHOD_MODE1))
> +   return -EINVAL;
> +
> +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> +   adev->pdev->bus->number, 1);
> +   if (!p)
> +   return -ENODEV;
> +
> +   /*
> +* 3S is the audio controller default autosuspend delay setting.
> +* 4S used here is guaranteed to cover that.
> +*/

Instead of hardcoding 3S, we should probably use
pm_runtime_autosuspend_expiration() to query how much time is left and then use 
that.  That way this will work even if userspace has changed the delay.  With 
that fixed:
Reviewed-by: Alex Deucher 

Alex


> +   end_jiffies = msecs_to_jiffies(4000) + jiffies;
> +   while (!pm_runtime_status_suspended(&(p->dev))) {
> +   if (!pm_runtime_suspend(&(p->dev)))
> +   break;
> +
> +   if (time_after(jiffies, end_jiffies)) {
> +   dev_warn(adev->dev, "failed to suspend display 
> audio\n");
> +   /* TODO: abort the succeeding gpu reset? */
> +   return -ETIMEDOUT;
> +   }
> +   }
> +
> +   pm_runtime_disable(&(p->dev));
> +
> +   return 0;
> +}
> +
>  /**
>   * amdgpu_device_gpu_recover - reset the asic and recover scheduler
>   *
> @@ -4170,6 +4224,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
> bool use_baco =
> (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
> true : false;
> +   

Re: [PATCH] drm/amdgpu: put the audio codec into suspend state before gpu reset V2

2020-04-22 Thread Alex Deucher
On Tue, Apr 21, 2020 at 10:42 PM Evan Quan  wrote:
>
> At default, the autosuspend delay of audio controller is 3S. If the
> gpu reset is triggered within 3S(after audio controller idle),
> the audio controller may be unable into suspended state. Then
> the sudden gpu reset will cause some audio errors. The change
> here is targeted to resolve this.
>
> However if the audio controller is in use when the gpu reset
> triggered, this change may be still not enough to put the
> audio controller into suspend state. Under this case, the
> gpu reset will still proceed but there will be a warning
> message printed("failed to suspend display audio").
>
> V2: limit this for BACO and mode1 reset only
>
> Change-Id: I33d85e6fcad1882eb33f9cde8916d57be8d5a87a
> Signed-off-by: Evan Quan 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 70 ++
>  1 file changed, 70 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 2d4b78d96426..70f43b1aed78 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -69,6 +69,7 @@
>
>  #include 
>  #include 
> +#include 
>
>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> @@ -4146,6 +4147,59 @@ static void amdgpu_device_unlock_adev(struct 
> amdgpu_device *adev)
> mutex_unlock(&adev->lock_reset);
>  }
>
> +static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
> +{
> +   struct pci_dev *p = NULL;
> +
> +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> +   adev->pdev->bus->number, 1);
> +   if (p) {
> +   pm_runtime_enable(&(p->dev));
> +   pm_runtime_resume(&(p->dev));
> +   }
> +}
> +
> +static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
> +{
> +   enum amd_reset_method reset_method;
> +   struct pci_dev *p = NULL;
> +   unsigned long end_jiffies;
> +
> +   /*
> +* For now, only BACO and mode1 reset are confirmed
> +* to suffer the audio issue without proper suspended.
> +*/
> +   reset_method = amdgpu_asic_reset_method(adev);
> +   if ((reset_method != AMD_RESET_METHOD_BACO) &&
> +(reset_method != AMD_RESET_METHOD_MODE1))
> +   return -EINVAL;
> +
> +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> +   adev->pdev->bus->number, 1);
> +   if (!p)
> +   return -ENODEV;
> +
> +   /*
> +* 3S is the audio controller default autosuspend delay setting.
> +* 4S used here is guaranteed to cover that.
> +*/

Instead of hardcoding 3S, we should probably use
pm_runtime_autosuspend_expiration() to query how much time is left and
then use that.  That way this will work even if userspace has changed
the delay.  With that fixed:
Reviewed-by: Alex Deucher 

Alex


> +   end_jiffies = msecs_to_jiffies(4000) + jiffies;
> +   while (!pm_runtime_status_suspended(&(p->dev))) {
> +   if (!pm_runtime_suspend(&(p->dev)))
> +   break;
> +
> +   if (time_after(jiffies, end_jiffies)) {
> +   dev_warn(adev->dev, "failed to suspend display 
> audio\n");
> +   /* TODO: abort the succeeding gpu reset? */
> +   return -ETIMEDOUT;
> +   }
> +   }
> +
> +   pm_runtime_disable(&(p->dev));
> +
> +   return 0;
> +}
> +
>  /**
>   * amdgpu_device_gpu_recover - reset the asic and recover scheduler
>   *
> @@ -4170,6 +4224,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
> bool use_baco =
> (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
> true : false;
> +   bool audio_suspended = false;
>
> /*
>  * Flush RAM to disk so that after reboot
> @@ -4227,6 +4282,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
> return 0;
> }
>
> +   /*
> +* Try to put the audio codec into suspend state
> +* before gpu reset started.
> +*
> +* Due to the power domain of the graphics device
> +* is shared with AZ power domain. Without this,
> +* we may change the audio hardware from behind
> +* the audio driver's back. That will trigger
> +* some audio codec errors.
> +*/
> +   if (!amdgpu_device_suspend_display_audio(tmp_adev))
> +   audio_suspended = true;
> +
> amdgpu_ras_set_error_query_ready(tmp_adev, false);
>
> cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
> @@ -4339,6 +4407,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
>   

RE: [PATCH] drm/amdgpu: put the audio codec into suspend state before gpu reset

2020-04-21 Thread Quan, Evan
Mode1 reset is also affected as I confirmed on navi10 unfortunately.
That is why the original design(switch to mode1 reset on audio suspended 
failure) over our previous discussions was not taken.
Anyway, I sent out a V2 patch to limit this for baco and mode1 reset only.

Regards,
Evan
-Original Message-
From: Alex Deucher  
Sent: Wednesday, April 22, 2020 4:18 AM
To: Quan, Evan 
Cc: amd-gfx list ; Deucher, Alexander 

Subject: Re: [PATCH] drm/amdgpu: put the audio codec into suspend state before 
gpu reset

On Tue, Apr 21, 2020 at 8:00 AM Evan Quan  wrote:
>
> At default, the autosuspend delay of audio controller is 3S. If the 
> gpu reset is triggered within 3S(after audio controller idle), the 
> audio controller may be unable into suspended state. Then the sudden 
> gpu reset will cause some audio errors. The change here is targeted to 
> resolve this.
>
> However if the audio controller is in use when the gpu reset 
> triggered, this change may be still not enough to put the audio 
> controller into suspend state. Under this case, the gpu reset will 
> still proceed but there will be a warning message printed("failed to 
> suspend display audio").
>
> Change-Id: I33d85e6fcad1882eb33f9cde8916d57be8d5a87a
> Signed-off-by: Evan Quan 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 60 
> ++
>  1 file changed, 60 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 2d4b78d96426..983e294d0300 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -69,6 +69,7 @@
>
>  #include 
>  #include 
> +#include 
>
>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> @@ -4146,6 +4147,49 @@ static void amdgpu_device_unlock_adev(struct 
> amdgpu_device *adev)
> mutex_unlock(&adev->lock_reset);  }
>
> +static void amdgpu_device_resume_display_audio(struct amdgpu_device 
> +*adev) {
> +   struct pci_dev *p = NULL;
> +
> +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> +   adev->pdev->bus->number, 1);
> +   if (p) {
> +   pm_runtime_enable(&(p->dev));
> +   pm_runtime_resume(&(p->dev));
> +   }
> +}
> +
> +static int amdgpu_device_suspend_display_audio(struct amdgpu_device 
> +*adev) {
> +   struct pci_dev *p = NULL;
> +   unsigned long end_jiffies;
> +
> +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> +   adev->pdev->bus->number, 1);
> +   if (!p)
> +   return -ENODEV;
> +

With mode1 reset affect this or just BACO?  If it's just BACO, we should check 
the reset method here and return an error if not using BACO.

Alex


> +   /*
> +* 3S is the audio controller default autosuspend delay setting.
> +* 4S used here is guaranteed to cover that.
> +*/
> +   end_jiffies = msecs_to_jiffies(4000) + jiffies;
> +   while (!pm_runtime_status_suspended(&(p->dev))) {
> +   if (!pm_runtime_suspend(&(p->dev)))
> +   break;
> +
> +   if (time_after(jiffies, end_jiffies)) {
> +   dev_warn(adev->dev, "failed to suspend display 
> audio\n");
> +   /* TODO: abort the succeeding gpu reset? */
> +   return -ETIMEDOUT;
> +   }
> +   }
> +
> +   pm_runtime_disable(&(p->dev));
> +
> +   return 0;
> +}
> +
>  /**
>   * amdgpu_device_gpu_recover - reset the asic and recover scheduler
>   *
> @@ -4170,6 +4214,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
> bool use_baco =
> (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
> true : false;
> +   bool audio_suspended = false;
>
> /*
>  * Flush RAM to disk so that after reboot @@ -4227,6 +4272,19 
> @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
> return 0;
> }
>
> +   /*
> +* Try to put the audio codec into suspend state
> +* before gpu reset started.
> +*
> +* Due to the power domain of the graphics device
> +* is shared with AZ power domain. Without this,
> +* we may change the audio hardware from behind
> +* the audio driver's back. That will trigger
> +

[PATCH] drm/amdgpu: put the audio codec into suspend state before gpu reset V2

2020-04-21 Thread Evan Quan
At default, the autosuspend delay of audio controller is 3S. If the
gpu reset is triggered within 3S(after audio controller idle),
the audio controller may be unable into suspended state. Then
the sudden gpu reset will cause some audio errors. The change
here is targeted to resolve this.

However if the audio controller is in use when the gpu reset
triggered, this change may be still not enough to put the
audio controller into suspend state. Under this case, the
gpu reset will still proceed but there will be a warning
message printed("failed to suspend display audio").

V2: limit this for BACO and mode1 reset only

Change-Id: I33d85e6fcad1882eb33f9cde8916d57be8d5a87a
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 70 ++
 1 file changed, 70 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2d4b78d96426..70f43b1aed78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -69,6 +69,7 @@
 
 #include 
 #include 
+#include 
 
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
@@ -4146,6 +4147,59 @@ static void amdgpu_device_unlock_adev(struct 
amdgpu_device *adev)
mutex_unlock(&adev->lock_reset);
 }
 
+static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
+{
+   struct pci_dev *p = NULL;
+
+   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+   adev->pdev->bus->number, 1);
+   if (p) {
+   pm_runtime_enable(&(p->dev));
+   pm_runtime_resume(&(p->dev));
+   }
+}
+
+static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
+{
+   enum amd_reset_method reset_method;
+   struct pci_dev *p = NULL;
+   unsigned long end_jiffies;
+
+   /*
+* For now, only BACO and mode1 reset are confirmed
+* to suffer the audio issue without proper suspended.
+*/
+   reset_method = amdgpu_asic_reset_method(adev);
+   if ((reset_method != AMD_RESET_METHOD_BACO) &&
+(reset_method != AMD_RESET_METHOD_MODE1))
+   return -EINVAL;
+
+   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+   adev->pdev->bus->number, 1);
+   if (!p)
+   return -ENODEV;
+
+   /*
+* 3S is the audio controller default autosuspend delay setting.
+* 4S used here is guaranteed to cover that.
+*/
+   end_jiffies = msecs_to_jiffies(4000) + jiffies;
+   while (!pm_runtime_status_suspended(&(p->dev))) {
+   if (!pm_runtime_suspend(&(p->dev)))
+   break;
+
+   if (time_after(jiffies, end_jiffies)) {
+   dev_warn(adev->dev, "failed to suspend display 
audio\n");
+   /* TODO: abort the succeeding gpu reset? */
+   return -ETIMEDOUT;
+   }
+   }
+
+   pm_runtime_disable(&(p->dev));
+
+   return 0;
+}
+
 /**
  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
  *
@@ -4170,6 +4224,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
bool use_baco =
(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
true : false;
+   bool audio_suspended = false;
 
/*
 * Flush RAM to disk so that after reboot
@@ -4227,6 +4282,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
return 0;
}
 
+   /*
+* Try to put the audio codec into suspend state
+* before gpu reset started.
+*
+* Due to the power domain of the graphics device
+* is shared with AZ power domain. Without this,
+* we may change the audio hardware from behind
+* the audio driver's back. That will trigger
+* some audio codec errors.
+*/
+   if (!amdgpu_device_suspend_display_audio(tmp_adev))
+   audio_suspended = true;
+
amdgpu_ras_set_error_query_ready(tmp_adev, false);
 
cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
@@ -4339,6 +4407,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/*unlock kfd: SRIOV would do it separately */
if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
+   if (audio_suspended)
+   amdgpu_device_resume_display_audio(tmp_adev);
amdgpu_device_unlock_adev(tmp_adev);
}
 
-- 
2.26.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdgpu: put the audio codec into suspend state before gpu reset

2020-04-21 Thread Alex Deucher
On Tue, Apr 21, 2020 at 8:00 AM Evan Quan  wrote:
>
> At default, the autosuspend delay of audio controller is 3S. If the
> gpu reset is triggered within 3S(after audio controller idle),
> the audio controller may be unable into suspended state. Then
> the sudden gpu reset will cause some audio errors. The change
> here is targeted to resolve this.
>
> However if the audio controller is in use when the gpu reset
> triggered, this change may be still not enough to put the
> audio controller into suspend state. Under this case, the
> gpu reset will still proceed but there will be a warning
> message printed("failed to suspend display audio").
>
> Change-Id: I33d85e6fcad1882eb33f9cde8916d57be8d5a87a
> Signed-off-by: Evan Quan 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 60 ++
>  1 file changed, 60 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 2d4b78d96426..983e294d0300 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -69,6 +69,7 @@
>
>  #include 
>  #include 
> +#include 
>
>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
> @@ -4146,6 +4147,49 @@ static void amdgpu_device_unlock_adev(struct 
> amdgpu_device *adev)
> mutex_unlock(&adev->lock_reset);
>  }
>
> +static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
> +{
> +   struct pci_dev *p = NULL;
> +
> +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> +   adev->pdev->bus->number, 1);
> +   if (p) {
> +   pm_runtime_enable(&(p->dev));
> +   pm_runtime_resume(&(p->dev));
> +   }
> +}
> +
> +static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
> +{
> +   struct pci_dev *p = NULL;
> +   unsigned long end_jiffies;
> +
> +   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
> +   adev->pdev->bus->number, 1);
> +   if (!p)
> +   return -ENODEV;
> +

With mode1 reset affect this or just BACO?  If it's just BACO, we
should check the reset method here and return an error if not using
BACO.

Alex


> +   /*
> +* 3S is the audio controller default autosuspend delay setting.
> +* 4S used here is guaranteed to cover that.
> +*/
> +   end_jiffies = msecs_to_jiffies(4000) + jiffies;
> +   while (!pm_runtime_status_suspended(&(p->dev))) {
> +   if (!pm_runtime_suspend(&(p->dev)))
> +   break;
> +
> +   if (time_after(jiffies, end_jiffies)) {
> +   dev_warn(adev->dev, "failed to suspend display 
> audio\n");
> +   /* TODO: abort the succeeding gpu reset? */
> +   return -ETIMEDOUT;
> +   }
> +   }
> +
> +   pm_runtime_disable(&(p->dev));
> +
> +   return 0;
> +}
> +
>  /**
>   * amdgpu_device_gpu_recover - reset the asic and recover scheduler
>   *
> @@ -4170,6 +4214,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
> bool use_baco =
> (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
> true : false;
> +   bool audio_suspended = false;
>
> /*
>  * Flush RAM to disk so that after reboot
> @@ -4227,6 +4272,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
> return 0;
> }
>
> +   /*
> +* Try to put the audio codec into suspend state
> +* before gpu reset started.
> +*
> +* Due to the power domain of the graphics device
> +* is shared with AZ power domain. Without this,
> +* we may change the audio hardware from behind
> +* the audio driver's back. That will trigger
> +* some audio codec errors.
> +*/
> +   if (!amdgpu_device_suspend_display_audio(tmp_adev))
> +   audio_suspended = true;
> +
> amdgpu_ras_set_error_query_ready(tmp_adev, false);
>
> cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
> @@ -4339,6 +4397,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device 
> *adev,
> /*unlock kfd: SRIOV would do it separately */
> if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
> amdgpu_amdkfd_post_reset(tmp_adev);
> +   if (audio_suspended)
> +   amdgpu_device_resume_display_audio(tmp_adev);
> amdgpu_device_unlock_adev(tmp_adev);
> }
>
> --
> 2.26.2
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: put the audio codec into suspend state before gpu reset

2020-04-21 Thread Evan Quan
At default, the autosuspend delay of audio controller is 3S. If the
gpu reset is triggered within 3S(after audio controller idle),
the audio controller may be unable into suspended state. Then
the sudden gpu reset will cause some audio errors. The change
here is targeted to resolve this.

However if the audio controller is in use when the gpu reset
triggered, this change may be still not enough to put the
audio controller into suspend state. Under this case, the
gpu reset will still proceed but there will be a warning
message printed("failed to suspend display audio").

Change-Id: I33d85e6fcad1882eb33f9cde8916d57be8d5a87a
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 60 ++
 1 file changed, 60 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2d4b78d96426..983e294d0300 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -69,6 +69,7 @@
 
 #include 
 #include 
+#include 
 
 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
@@ -4146,6 +4147,49 @@ static void amdgpu_device_unlock_adev(struct 
amdgpu_device *adev)
mutex_unlock(&adev->lock_reset);
 }
 
+static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
+{
+   struct pci_dev *p = NULL;
+
+   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+   adev->pdev->bus->number, 1);
+   if (p) {
+   pm_runtime_enable(&(p->dev));
+   pm_runtime_resume(&(p->dev));
+   }
+}
+
+static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
+{
+   struct pci_dev *p = NULL;
+   unsigned long end_jiffies;
+
+   p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
+   adev->pdev->bus->number, 1);
+   if (!p)
+   return -ENODEV;
+
+   /*
+* 3S is the audio controller default autosuspend delay setting.
+* 4S used here is guaranteed to cover that.
+*/
+   end_jiffies = msecs_to_jiffies(4000) + jiffies;
+   while (!pm_runtime_status_suspended(&(p->dev))) {
+   if (!pm_runtime_suspend(&(p->dev)))
+   break;
+
+   if (time_after(jiffies, end_jiffies)) {
+   dev_warn(adev->dev, "failed to suspend display 
audio\n");
+   /* TODO: abort the succeeding gpu reset? */
+   return -ETIMEDOUT;
+   }
+   }
+
+   pm_runtime_disable(&(p->dev));
+
+   return 0;
+}
+
 /**
  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
  *
@@ -4170,6 +4214,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
bool use_baco =
(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
true : false;
+   bool audio_suspended = false;
 
/*
 * Flush RAM to disk so that after reboot
@@ -4227,6 +4272,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
return 0;
}
 
+   /*
+* Try to put the audio codec into suspend state
+* before gpu reset started.
+*
+* Due to the power domain of the graphics device
+* is shared with AZ power domain. Without this,
+* we may change the audio hardware from behind
+* the audio driver's back. That will trigger
+* some audio codec errors.
+*/
+   if (!amdgpu_device_suspend_display_audio(tmp_adev))
+   audio_suspended = true;
+
amdgpu_ras_set_error_query_ready(tmp_adev, false);
 
cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
@@ -4339,6 +4397,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/*unlock kfd: SRIOV would do it separately */
if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
+   if (audio_suspended)
+   amdgpu_device_resume_display_audio(tmp_adev);
amdgpu_device_unlock_adev(tmp_adev);
}
 
-- 
2.26.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx