On 1/16/26 17:20, Alex Deucher wrote:
> Remove the old direct submit path. This simplifies
> the code.
>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 106 ++++++++-------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 +-
> 4 files changed, 45 insertions(+), 71 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 877d0df50376a..399a59bf907dd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -686,7 +686,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
> job->vmid = vmid;
> job->num_ibs = 1;
>
> - ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
> + ret = amdgpu_ib_schedule(ring, job, &f);
>
> if (ret) {
> drm_err(adev_to_drm(adev), "failed to schedule IB.\n");
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 136e50de712a0..fb58637ed1507 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -103,8 +103,6 @@ void amdgpu_ib_free(struct amdgpu_ib *ib, struct
> dma_fence *f)
> * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring
> *
> * @ring: ring index the IB is associated with
> - * @num_ibs: number of IBs to schedule
> - * @ibs: IB objects to schedule
> * @job: job to schedule
> * @f: fence created during this submission
> *
> @@ -121,12 +119,11 @@ void amdgpu_ib_free(struct amdgpu_ib *ib, struct
> dma_fence *f)
> * a CONST_IB), it will be put on the ring prior to the DE IB. Prior
> * to SI there was just a DE IB.
> */
> -int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
> - struct amdgpu_ib *ibs, struct amdgpu_job *job,
> +int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct amdgpu_job *job,
> struct dma_fence **f)
> {
> struct amdgpu_device *adev = ring->adev;
> - struct amdgpu_ib *ib = &ibs[0];
> + struct amdgpu_ib *ib;
> struct dma_fence *tmp = NULL;
> struct amdgpu_fence *af;
> bool need_ctx_switch;
> @@ -142,64 +139,51 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned int num_ibs,
> unsigned int i;
> int r = 0;
>
> - if (num_ibs == 0)
> + if (!job)
> + return -EINVAL;
> + if (job->num_ibs == 0)
> return -EINVAL;
>
> - /* ring tests don't use a job */
> - if (job) {
> - vm = job->vm;
> - fence_ctx = job->base.s_fence ?
> - job->base.s_fence->finished.context : 0;
> - shadow_va = job->shadow_va;
> - csa_va = job->csa_va;
> - gds_va = job->gds_va;
> - init_shadow = job->init_shadow;
> - af = job->hw_fence;
> - /* Save the context of the job for reset handling.
> - * The driver needs this so it can skip the ring
> - * contents for guilty contexts.
> - */
> - af->context = fence_ctx;
> - /* the vm fence is also part of the job's context */
> - job->hw_vm_fence->context = fence_ctx;
> - } else {
> - vm = NULL;
> - fence_ctx = 0;
> - shadow_va = 0;
> - csa_va = 0;
> - gds_va = 0;
> - init_shadow = false;
> - af = kzalloc(sizeof(*af), GFP_ATOMIC);
> - if (!af)
> - return -ENOMEM;
> - }
> + ib = &job->ibs[0];
> + vm = job->vm;
> + fence_ctx = job->base.s_fence ?
> + job->base.s_fence->finished.context : 0;
> + shadow_va = job->shadow_va;
> + csa_va = job->csa_va;
> + gds_va = job->gds_va;
> + init_shadow = job->init_shadow;
> + af = job->hw_fence;
I think we could also drop those local variable now as well (maybe except the
VM). They are only used once or twice anyway.
Apart from that looks good to me,
Christian.
> + /* Save the context of the job for reset handling.
> + * The driver needs this so it can skip the ring
> + * contents for guilty contexts.
> + */
> + af->context = fence_ctx;
> + /* the vm fence is also part of the job's context */
> + job->hw_vm_fence->context = fence_ctx;
>
> if (!ring->sched.ready) {
> dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n",
> ring->name);
> - r = -EINVAL;
> - goto free_fence;
> + return -EINVAL;
> }
>
> if (vm && !job->vmid) {
> dev_err(adev->dev, "VM IB without ID\n");
> - r = -EINVAL;
> - goto free_fence;
> + return -EINVAL;
> }
>
> if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) &&
> (!ring->funcs->secure_submission_supported)) {
> dev_err(adev->dev, "secure submissions not supported on ring
> <%s>\n", ring->name);
> - r = -EINVAL;
> - goto free_fence;
> + return -EINVAL;
> }
>
> - alloc_size = ring->funcs->emit_frame_size + num_ibs *
> + alloc_size = ring->funcs->emit_frame_size + job->num_ibs *
> ring->funcs->emit_ib_size;
>
> r = amdgpu_ring_alloc(ring, alloc_size);
> if (r) {
> dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
> - goto free_fence;
> + return r;
> }
>
> need_ctx_switch = ring->current_ctx != fence_ctx;
> @@ -225,12 +209,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned int num_ibs,
> if (ring->funcs->insert_start)
> ring->funcs->insert_start(ring);
>
> - if (job) {
> - r = amdgpu_vm_flush(ring, job, need_pipe_sync);
> - if (r) {
> - amdgpu_ring_undo(ring);
> - return r;
> - }
> + r = amdgpu_vm_flush(ring, job, need_pipe_sync);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + return r;
> }
>
> amdgpu_ring_ib_begin(ring);
> @@ -248,7 +230,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned
> int num_ibs,
> if (need_ctx_switch)
> status |= AMDGPU_HAVE_CTX_SWITCH;
>
> - if (job && ring->funcs->emit_cntxcntl) {
> + if (ring->funcs->emit_cntxcntl) {
> status |= job->preamble_status;
> status |= job->preemption_status;
> amdgpu_ring_emit_cntxcntl(ring, status);
> @@ -257,15 +239,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned int num_ibs,
> /* Setup initial TMZiness and send it off.
> */
> secure = false;
> - if (job && ring->funcs->emit_frame_cntl) {
> + if (ring->funcs->emit_frame_cntl) {
> secure = ib->flags & AMDGPU_IB_FLAGS_SECURE;
> amdgpu_ring_emit_frame_cntl(ring, true, secure);
> }
>
> - for (i = 0; i < num_ibs; ++i) {
> - ib = &ibs[i];
> + for (i = 0; i < job->num_ibs; ++i) {
> + ib = &job->ibs[i];
>
> - if (job && ring->funcs->emit_frame_cntl) {
> + if (ring->funcs->emit_frame_cntl) {
> if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
> amdgpu_ring_emit_frame_cntl(ring, false,
> secure);
> secure = !secure;
> @@ -277,7 +259,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned
> int num_ibs,
> status &= ~AMDGPU_HAVE_CTX_SWITCH;
> }
>
> - if (job && ring->funcs->emit_frame_cntl)
> + if (ring->funcs->emit_frame_cntl)
> amdgpu_ring_emit_frame_cntl(ring, false, secure);
>
> amdgpu_device_invalidate_hdp(adev, ring);
> @@ -286,7 +268,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned
> int num_ibs,
> fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
>
> /* wrap the last IB with fence */
> - if (job && job->uf_addr) {
> + if (job->uf_addr) {
> amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
> fence_flags | AMDGPU_FENCE_FLAG_64BIT);
> }
> @@ -299,15 +281,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned int num_ibs,
> r = amdgpu_fence_emit(ring, af, fence_flags);
> if (r) {
> dev_err(adev->dev, "failed to emit fence (%d)\n", r);
> - if (job && job->vmid)
> + if (job->vmid)
> amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid);
> amdgpu_ring_undo(ring);
> - goto free_fence;
> + return r;
> }
> *f = &af->base;
> /* get a ref for the job */
> - if (job)
> - dma_fence_get(*f);
> + dma_fence_get(*f);
>
> if (ring->funcs->insert_end)
> ring->funcs->insert_end(ring);
> @@ -315,7 +296,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned
> int num_ibs,
> amdgpu_ring_patch_cond_exec(ring, cond_exec);
>
> ring->current_ctx = fence_ctx;
> - if (job && ring->funcs->emit_switch_buffer)
> + if (ring->funcs->emit_switch_buffer)
> amdgpu_ring_emit_switch_buffer(ring);
>
> if (ring->funcs->emit_wave_limit &&
> @@ -334,11 +315,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned int num_ibs,
> amdgpu_ring_commit(ring);
>
> return 0;
> -
> -free_fence:
> - if (!job)
> - kfree(af);
> - return r;
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index c7e4d79b9f61d..b09ac3dda17a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -373,7 +373,7 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job,
> struct amdgpu_ring *ring,
> int r;
>
> job->base.sched = &ring->sched;
> - r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence);
> + r = amdgpu_ib_schedule(ring, job, fence);
>
> if (r)
> return r;
> @@ -443,8 +443,7 @@ static struct dma_fence *amdgpu_job_run(struct
> drm_sched_job *sched_job)
> dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
> ring->name);
> } else {
> - r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
> - &fence);
> + r = amdgpu_ib_schedule(ring, job, &fence);
> if (r)
> dev_err(adev->dev,
> "Error scheduling IBs (%d) in ring(%s)", r,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index cb0fb1a989d2f..86a788d476957 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -569,8 +569,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct
> amdgpu_vm *vm,
> enum amdgpu_ib_pool_type pool,
> struct amdgpu_ib *ib);
> void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f);
> -int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> - struct amdgpu_ib *ibs, struct amdgpu_job *job,
> +int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct amdgpu_job *job,
> struct dma_fence **f);
> int amdgpu_ib_pool_init(struct amdgpu_device *adev);
> void amdgpu_ib_pool_fini(struct amdgpu_device *adev);