On 2/4/26 11:11 AM, Lizhi Hou wrote:
Currently, amdxdna_pm_resume_get() is called during job creation, and amdxdna_pm_suspend_put() is called when the hardware notifies job completion. If a job is canceled before it is run, no hardware completion notification is generated, resulting in an unbalanced runtime PM resume/suspend pair.Fix this by moving amdxdna_pm_resume_get() to the job run path, ensuring runtime PM is only resumed for jobs that are actually executed. Fixes: 063db451832b ("accel/amdxdna: Enhance runtime power management") Signed-off-by: Lizhi Hou <[email protected]>
Reviewed-by: Mario Limonciello (AMD) <[email protected]>
--- drivers/accel/amdxdna/aie2_ctx.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index fe8f9783a73c..37d05f2e986f 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -306,6 +306,10 @@ aie2_sched_job_run(struct drm_sched_job *sched_job) kref_get(&job->refcnt); fence = dma_fence_get(job->fence);+ ret = amdxdna_pm_resume_get(hwctx->client->xdna);+ if (ret) + goto out; + if (job->drv_cmd) { switch (job->drv_cmd->opcode) { case SYNC_DEBUG_BO: @@ -332,6 +336,7 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)out:if (ret) { + amdxdna_pm_suspend_put(hwctx->client->xdna); dma_fence_put(job->fence); aie2_job_put(job); mmput(job->mm); @@ -988,15 +993,11 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, goto free_chain; }- ret = amdxdna_pm_resume_get(xdna);- if (ret) - goto cleanup_job; - retry: ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx); if (ret) { XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret); - goto suspend_put; + goto cleanup_job; }for (i = 0; i < job->bo_cnt; i++) {@@ -1004,7 +1005,7 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, if (ret) { XDNA_WARN(xdna, "Failed to reserve fences %d", ret); drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx); - goto suspend_put; + goto cleanup_job; } }@@ -1019,12 +1020,12 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); } else if (time_after(jiffies, timeout)) { ret = -ETIME; - goto suspend_put; + goto cleanup_job; }ret = aie2_populate_range(abo);if (ret) - goto suspend_put; + goto cleanup_job; goto retry; } } @@ -1050,8 +1051,6 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,return 0; -suspend_put:- amdxdna_pm_suspend_put(xdna); cleanup_job: drm_sched_job_cleanup(&job->base); free_chain:
