On 1/16/26 17:20, Alex Deucher wrote:
> If we need to allocate a job during GPU reset, use
> GFP_ATOMIC rather than GFP_KERNEL.
> 
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c     | 2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    | 9 ++++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 3 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c     | 6 ++++--
>  4 files changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 72ec455fa932c..136e50de712a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -68,7 +68,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm,
>       int r;
>  
>       if (size) {
> -             r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
> +             r = amdgpu_sa_bo_new(adev, &adev->ib_pools[pool_type],
>                                    &ib->sa_bo, size);
>               if (r) {
>                       dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 1daa9145b217e..c7e4d79b9f61d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -192,18 +192,21 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct 
> amdgpu_vm *vm,
>       if (num_ibs == 0)
>               return -EINVAL;
>  
> -     *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
> +     *job = kzalloc(struct_size(*job, ibs, num_ibs),
> +                    amdgpu_in_reset(adev) ? GFP_ATOMIC : GFP_KERNEL);

That's an extremely bad idea, amdgpu_in_reset() returns true even outside of 
the reset thread.

We really need to look at the pool type.

Regards,
Christian.

>       if (!*job)
>               return -ENOMEM;
>  
> -     af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
> +     af = kzalloc(sizeof(struct amdgpu_fence),
> +                  amdgpu_in_reset(adev) ? GFP_ATOMIC : GFP_KERNEL);
>       if (!af) {
>               r = -ENOMEM;
>               goto err_job;
>       }
>       (*job)->hw_fence = af;
>  
> -     af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
> +     af = kzalloc(sizeof(struct amdgpu_fence),
> +                  amdgpu_in_reset(adev) ? GFP_ATOMIC : GFP_KERNEL);
>       if (!af) {
>               r = -ENOMEM;
>               goto err_fence;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 912c9afaf9e11..7ee0cc46b4608 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -339,7 +339,8 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
>                                     struct amdgpu_sa_manager *sa_manager);
>  int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
>                                     struct amdgpu_sa_manager *sa_manager);
> -int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
> +int amdgpu_sa_bo_new(struct amdgpu_device *adev,
> +                  struct amdgpu_sa_manager *sa_manager,
>                    struct drm_suballoc **sa_bo,
>                    unsigned int size);
>  void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> index 39070b2a4c04f..fc13969f8ef49 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> @@ -76,12 +76,14 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
>       amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, 
> &sa_manager->cpu_ptr);
>  }
>  
> -int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
> +int amdgpu_sa_bo_new(struct amdgpu_device *adev,
> +                  struct amdgpu_sa_manager *sa_manager,
>                    struct drm_suballoc **sa_bo,
>                    unsigned int size)
>  {
>       struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
> -                                                GFP_KERNEL, false, 0);
> +                                                amdgpu_in_reset(adev) ? 
> GFP_ATOMIC : GFP_KERNEL,
> +                                                false, 0);
>  
>       if (IS_ERR(sa)) {
>               *sa_bo = NULL;

Reply via email to