On 6/13/2024 7:55 AM, YiPeng Chai wrote:
> 1. Cannot add messages to fifo in gpu reset mode.
> 2. Only when the message is successfully saved to the
> fifo, the thread can be awakened.
> 

I think fifo should still cache the poison requests while in reset. Page
retirement thread may try to acquire the read side of reset lock and
wait if any reset is in progress.

Thanks
Lijo

> Signed-off-by: YiPeng Chai <yipeng.c...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 ++++++++++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 18 +++++++++++-------
>  2 files changed, 21 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index d0dcd3d37e6d..ed260966363f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2093,12 +2093,16 @@ static void 
> amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj
>       if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
>               struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
>  
> -             amdgpu_ras_put_poison_req(obj->adev,
> -                     AMDGPU_RAS_BLOCK__UMC, 0, NULL, NULL, false);
> -
> -             atomic_inc(&con->page_retirement_req_cnt);
> -
> -             wake_up(&con->page_retirement_wq);
> +             if (!amdgpu_in_reset(obj->adev) && 
> !atomic_read(&con->in_recovery)) {
> +                     int ret;
> +
> +                     ret = amdgpu_ras_put_poison_req(obj->adev,
> +                             AMDGPU_RAS_BLOCK__UMC, 0, NULL, NULL, false);
> +                     if (!ret) {
> +                             atomic_inc(&con->page_retirement_req_cnt);
> +                             wake_up(&con->page_retirement_wq);
> +                     }
> +             }
>       }
>  #endif
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> index 1dbe69eabb9a..94181ae85886 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> @@ -293,16 +293,20 @@ int amdgpu_umc_pasid_poison_handler(struct 
> amdgpu_device *adev,
>  
>                       amdgpu_ras_error_data_fini(&err_data);
>               } else {
> -                             struct amdgpu_ras *con = 
> amdgpu_ras_get_context(adev);
> -
>  #ifdef HAVE_KFIFO_PUT_NON_POINTER
> -                             amdgpu_ras_put_poison_req(adev,
> -                                     block, pasid, pasid_fn, data, reset);
> -#endif
> +                     struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
>  
> -                             atomic_inc(&con->page_retirement_req_cnt);
> +                     if (!amdgpu_in_reset(adev) && 
> !atomic_read(&con->in_recovery)) {
> +                             int ret;
>  
> -                             wake_up(&con->page_retirement_wq);
> +                             ret = amdgpu_ras_put_poison_req(adev,
> +                                     block, pasid, pasid_fn, data, reset);
> +                             if (!ret) {
> +                                     
> atomic_inc(&con->page_retirement_req_cnt);
> +                                     wake_up(&con->page_retirement_wq);
> +                             }
> +                     }
> +#endif
>               }
>       } else {
>               if (adev->virt.ops && adev->virt.ops->ras_poison_handler)

Reply via email to