On 10/31/25 18:43, Alex Deucher wrote:
> If we don't end up initializing the fences, free them when
> we free the job. We can't set the hw_fence to NULL after
> emitting it because we need it in the cleanup path for the
> submit direct case.
>
> v2: take a reference to the fences if we emit them
> v3: handle non-job fence in error paths
>
> Fixes: db36632ea51e ("drm/amdgpu: clean up and unify hw fence handling")
> Reviewed-by: Jesse Zhang <[email protected]> (v1)
> Signed-off-by: Alex Deucher <[email protected]>
Reviewed-by: Christian König <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 19 +++++++++++++++----
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 ++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++
> 3 files changed, 35 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 39229ece83f83..586a58facca10 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -176,18 +176,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned int num_ibs,
>
> if (!ring->sched.ready) {
> dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n",
> ring->name);
> - return -EINVAL;
> + r = -EINVAL;
> + goto free_fence;
> }
>
> if (vm && !job->vmid) {
> dev_err(adev->dev, "VM IB without ID\n");
> - return -EINVAL;
> + r = -EINVAL;
> + goto free_fence;
> }
>
> if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
> (!ring->funcs->secure_submission_supported)) {
> dev_err(adev->dev, "secure submissions not supported on ring
> <%s>\n", ring->name);
> - return -EINVAL;
> + r = -EINVAL;
> + goto free_fence;
> }
>
> alloc_size = ring->funcs->emit_frame_size + num_ibs *
> @@ -196,7 +199,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned
> int num_ibs,
> r = amdgpu_ring_alloc(ring, alloc_size);
> if (r) {
> dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
> - return r;
> + goto free_fence;
> }
>
> need_ctx_switch = ring->current_ctx != fence_ctx;
> @@ -302,6 +305,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned
> int num_ibs,
> return r;
> }
> *f = &af->base;
> + /* get a ref for the job */
> + if (job)
> + dma_fence_get(*f);
>
> if (ring->funcs->insert_end)
> ring->funcs->insert_end(ring);
> @@ -328,6 +334,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned int num_ibs,
> amdgpu_ring_commit(ring);
>
> return 0;
> +
> +free_fence:
> + if (!job)
> + kfree(af);
> + return r;
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index fd6aade7ee9e3..efa3281145f6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -293,6 +293,15 @@ static void amdgpu_job_free_cb(struct drm_sched_job
> *s_job)
>
> amdgpu_sync_free(&job->explicit_sync);
>
> + if (job->hw_fence->base.ops)
> + dma_fence_put(&job->hw_fence->base);
> + else
> + kfree(job->hw_fence);
> + if (job->hw_vm_fence->base.ops)
> + dma_fence_put(&job->hw_vm_fence->base);
> + else
> + kfree(job->hw_vm_fence);
> +
> kfree(job);
> }
>
> @@ -322,6 +331,15 @@ void amdgpu_job_free(struct amdgpu_job *job)
> if (job->gang_submit != &job->base.s_fence->scheduled)
> dma_fence_put(job->gang_submit);
>
> + if (job->hw_fence->base.ops)
> + dma_fence_put(&job->hw_fence->base);
> + else
> + kfree(job->hw_fence);
> + if (job->hw_vm_fence->base.ops)
> + dma_fence_put(&job->hw_vm_fence->base);
> + else
> + kfree(job->hw_vm_fence);
> +
> kfree(job);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c3dfb949a9b87..82e897cd5feac 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -849,6 +849,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct
> amdgpu_job *job,
> if (r)
> return r;
> fence = &job->hw_vm_fence->base;
> + /* get a ref for the job */
> + dma_fence_get(fence);
> }
>
> if (vm_flush_needed) {