On Fri, Jun 28, 2024 at 04:55:36PM +0200, Boris Brezillon wrote:
> A sync-only job is meant to provide a synchronization point on a
> queue, so we can't return a NULL fence there, we have to add a signal
> operation to the command stream which executes after all other
> previously submitted jobs are done.
> 
> Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
> Signed-off-by: Boris Brezillon <boris.brezil...@collabora.com>

Took me a bit longer to read, lets blame Friday.

> ---
>  drivers/gpu/drm/panthor/panthor_sched.c | 41 ++++++++++++++++++++-----
>  include/uapi/drm/panthor_drm.h          |  5 +++
>  2 files changed, 38 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c 
> b/drivers/gpu/drm/panthor/panthor_sched.c
> index 79ffcbc41d78..951ff7e63ea8 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -458,6 +458,16 @@ struct panthor_queue {
>               /** @seqno: Sequence number of the last initialized fence. */
>               atomic64_t seqno;
>  
> +             /**
> +              * @last_fence: Fence of the last submitted job.
> +              *
> +              * We return this fence when we get an empty command stream.
> +              * This way, we are guaranteed that all earlier jobs have 
> completed
> +              * when drm_sched_job::s_fence::finished without having to feed
> +              * the CS ring buffer with a dummy job that only signals the 
> fence.
> +              */
> +             struct dma_fence *last_fence;
> +
>               /**
>                * @in_flight_jobs: List containing all in-flight jobs.
>                *
> @@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, 
> struct panthor_queue *
>       panthor_kernel_bo_destroy(queue->ringbuf);
>       panthor_kernel_bo_destroy(queue->iface.mem);
>  
> +     /* Release the last_fence we were holding, if any. */
> +     dma_fence_put(queue->fence_ctx.last_fence);
> +
>       kfree(queue);
>  }
>  
> @@ -2865,11 +2878,14 @@ queue_run_job(struct drm_sched_job *sched_job)
>       static_assert(sizeof(call_instrs) % 64 == 0,
>                     "call_instrs is not aligned on a cacheline");
>  
> -     /* Stream size is zero, nothing to do => return a NULL fence and let
> -      * drm_sched signal the parent.
> +     /* Stream size is zero, nothing to do except making sure all previously
> +      * submitted jobs are done before we signal the
> +      * drm_sched_job::s_fence::finished fence.
>        */
> -     if (!job->call_info.size)
> -             return NULL;
> +     if (!job->call_info.size) {
> +             job->done_fence = dma_fence_get(queue->fence_ctx.last_fence);
> +             return job->done_fence;

What happens if the last job's done_fence was cancelled or timed out? Is the
sync job's done_fence going to be signalled with the same error?

Now that we're returning a fence here, should the job be also added into the
in_flight_jobs?

If you're happy with depending on the previous job's done_fence and not
track the sync job in in_flight_jobs, then you can have my

Reviewed-by: Liviu Dudau <liviu.du...@arm.com>

Best regards,
Liviu

> +     }
>  
>       ret = pm_runtime_resume_and_get(ptdev->base.dev);
>       if (drm_WARN_ON(&ptdev->base, ret))
> @@ -2928,6 +2944,10 @@ queue_run_job(struct drm_sched_job *sched_job)
>               }
>       }
>  
> +     /* Update the last fence. */
> +     dma_fence_put(queue->fence_ctx.last_fence);
> +     queue->fence_ctx.last_fence = dma_fence_get(job->done_fence);
> +
>       done_fence = dma_fence_get(job->done_fence);
>  
>  out_unlock:
> @@ -3378,10 +3398,15 @@ panthor_job_create(struct panthor_file *pfile,
>               goto err_put_job;
>       }
>  
> -     job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
> -     if (!job->done_fence) {
> -             ret = -ENOMEM;
> -             goto err_put_job;
> +     /* Empty command streams don't need a fence, they'll pick the one from
> +      * the previously submitted job.
> +      */
> +     if (job->call_info.size) {
> +             job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
> +             if (!job->done_fence) {
> +                     ret = -ENOMEM;
> +                     goto err_put_job;
> +             }
>       }
>  
>       ret = drm_sched_job_init(&job->base,
> diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
> index aaed8e12ad0b..926b1deb1116 100644
> --- a/include/uapi/drm/panthor_drm.h
> +++ b/include/uapi/drm/panthor_drm.h
> @@ -802,6 +802,9 @@ struct drm_panthor_queue_submit {
>        * Must be 64-bit/8-byte aligned (the size of a CS instruction)
>        *
>        * Can be zero if stream_addr is zero too.
> +      *
> +      * When the stream size is zero, the queue submit serves as a
> +      * synchronization point.
>        */
>       __u32 stream_size;
>  
> @@ -822,6 +825,8 @@ struct drm_panthor_queue_submit {
>        * ensure the GPU doesn't get garbage when reading the indirect command
>        * stream buffers. If you want the cache flush to happen
>        * unconditionally, pass a zero here.
> +      *
> +      * Ignored when stream_size is zero.
>        */
>       __u32 latest_flush;
>  
> -- 
> 2.45.0
> 

-- 
====================
| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---------------
    ¯\_(ツ)_/¯

Reply via email to