[AMD Official Use Only - AMD Internal Distribution Only]

Reviewed-by: Jesse Zhang <[email protected]>

> -----Original Message-----
> From: amd-gfx <[email protected]> On Behalf Of Alex
> Deucher
> Sent: Wednesday, January 21, 2026 11:01 AM
> To: [email protected]
> Cc: Deucher, Alexander <[email protected]>
> Subject: [PATCH 02/10] drm/amdgpu/job: use GFP_ATOMIC while in gpu reset
>
> If we need to allocate a job during GPU reset, use GFP_ATOMIC rather than
> GFP_KERNEL.
>
> v2: use pool type it determine alloc flags.
>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c     |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    | 13 ++++++++-----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h    |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c     |  5 +++--
>  7 files changed, 16 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 877d0df50376a..89df26dd5ada7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -672,7 +672,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device
> *adev,
>               goto err;
>       }
>
> -     ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0);
> +     ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job, 0, false);
>       if (ret)
>               goto err;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index d591dce0f3b3c..4d53d9cb8490d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -282,7 +282,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
>       for (i = 0; i < p->gang_size; ++i) {
>               ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
>                                      num_ibs[i], &p->jobs[i],
> -                                    p->filp->client_id);
> +                                    p->filp->client_id, false);
>               if (ret)
>                       goto free_all_kdata;
>               switch (p->adev->enforce_isolation[fpriv->xcp_id]) { diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 72ec455fa932c..d90966daf52fc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -69,7 +69,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct
> amdgpu_vm *vm,
>
>       if (size) {
>               r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type],
> -                                  &ib->sa_bo, size);
> +                                  &ib->sa_bo, size, pool_type ==
> AMDGPU_IB_POOL_DIRECT);
>               if (r) {
>                       dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
>                       return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index ec8d74db62758..8660e3d1c3088 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -190,7 +190,7 @@ static enum drm_gpu_sched_stat
> amdgpu_job_timedout(struct drm_sched_job *s_job)  int amdgpu_job_alloc(struct
> amdgpu_device *adev, struct amdgpu_vm *vm,
>                    struct drm_sched_entity *entity, void *owner,
>                    unsigned int num_ibs, struct amdgpu_job **job,
> -                  u64 drm_client_id)
> +                  u64 drm_client_id, bool need_atomic)
>  {
>       struct amdgpu_fence *af;
>       int r;
> @@ -198,18 +198,21 @@ int amdgpu_job_alloc(struct amdgpu_device *adev,
> struct amdgpu_vm *vm,
>       if (num_ibs == 0)
>               return -EINVAL;
>
> -     *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
> +     *job = kzalloc(struct_size(*job, ibs, num_ibs),
> +                    need_atomic ? GFP_ATOMIC : GFP_KERNEL);
>       if (!*job)
>               return -ENOMEM;
>
> -     af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
> +     af = kzalloc(sizeof(struct amdgpu_fence),
> +                  need_atomic ? GFP_ATOMIC : GFP_KERNEL);
>       if (!af) {
>               r = -ENOMEM;
>               goto err_job;
>       }
>       (*job)->hw_fence = af;
>
> -     af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
> +     af = kzalloc(sizeof(struct amdgpu_fence),
> +                  need_atomic ? GFP_ATOMIC : GFP_KERNEL);
>       if (!af) {
>               r = -ENOMEM;
>               goto err_fence;
> @@ -248,7 +251,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device
> *adev,
>       int r;
>
>       r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job,
> -                          k_job_id);
> +                          k_job_id, pool_type == AMDGPU_IB_POOL_DIRECT);
>       if (r)
>               return r;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> index 56a88e14a0448..9de2cae966fea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> @@ -113,7 +113,7 @@ static inline struct amdgpu_ring *amdgpu_job_ring(struct
> amdgpu_job *job)  int amdgpu_job_alloc(struct amdgpu_device *adev, struct
> amdgpu_vm *vm,
>                    struct drm_sched_entity *entity, void *owner,
>                    unsigned int num_ibs, struct amdgpu_job **job,
> -                  u64 drm_client_id);
> +                  u64 drm_client_id, bool need_atomic);
>  int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
>                            struct drm_sched_entity *entity, void *owner,
>                            size_t size, enum amdgpu_ib_pool_type pool_type, 
> diff --
> git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 912c9afaf9e11..8abff5fdae81d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -341,7 +341,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device
> *adev,
>                                     struct amdgpu_sa_manager *sa_manager);  
> int
> amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
>                    struct drm_suballoc **sa_bo,
> -                  unsigned int size);
> +                  unsigned int size, bool need_atomic);
>  void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo,
>                      struct dma_fence *fence);
>  #if defined(CONFIG_DEBUG_FS)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> index 39070b2a4c04f..1d44b95de7e55 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> @@ -78,10 +78,11 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device
> *adev,
>
>  int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
>                    struct drm_suballoc **sa_bo,
> -                  unsigned int size)
> +                  unsigned int size, bool need_atomic)
>  {
>       struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size,
> -                                                GFP_KERNEL, false, 0);
> +                                                need_atomic ? GFP_ATOMIC :
> GFP_KERNEL,
> +                                                false, 0);
>
>       if (IS_ERR(sa)) {
>               *sa_bo = NULL;
> --
> 2.52.0

Reply via email to