On 2023-10-05 00:06, Matthew Brost wrote:
> On Thu, Sep 28, 2023 at 12:14:12PM -0400, Luben Tuikov wrote:
>> On 2023-09-19 01:01, Matthew Brost wrote:
>>> Rather than call free_job and run_job in same work item have a dedicated
>>> work item for each. This aligns with the design and intended use of work
>>> queues.
>>>
>>> v2:
>>> - Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting
>>> timestamp in free_job() work item (Danilo)
>>> v3:
>>> - Drop forward dec of drm_sched_select_entity (Boris)
>>> - Return in drm_sched_run_job_work if entity NULL (Boris)
>>>
>>> Signed-off-by: Matthew Brost <matthew.br...@intel.com>
>>> ---
>>> drivers/gpu/drm/scheduler/sched_main.c | 290 +++++++++++++++----------
>>> include/drm/gpu_scheduler.h | 8 +-
>>> 2 files changed, 182 insertions(+), 116 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index 588c735f7498..1e21d234fb5c 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -213,11 +213,12 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq
>>> *rq,
>>> * drm_sched_rq_select_entity_rr - Select an entity which could provide a
>>> job to run
>>> *
>>> * @rq: scheduler run queue to check.
>>> + * @dequeue: dequeue selected entity
>>
>> Change this to "peek" as indicated below.
>>
>>> *
>>> * Try to find a ready entity, returns NULL if none found.
>>> */
>>> static struct drm_sched_entity *
>>> -drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
>>> +drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq, bool dequeue)
>>> {
>>> struct drm_sched_entity *entity;
>>>
>>> @@ -227,8 +228,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
>>> if (entity) {
>>> list_for_each_entry_continue(entity, &rq->entities, list) {
>>> if (drm_sched_entity_is_ready(entity)) {
>>> - rq->current_entity = entity;
>>> - reinit_completion(&entity->entity_idle);
>>> + if (dequeue) {
>>> + rq->current_entity = entity;
>>> + reinit_completion(&entity->entity_idle);
>>> + }
>>
>> Please rename "dequeue" or invert its logic, as from this patch it seems that
>> it is hiding (gating out) current behaviour.
>>
>> Ideally, I'd prefer it be inverted, so that current behaviour, i.e. what
>> people
>> are used to the rq_select_entity_*() to do, is default--preserved.
>>
>> Perhaps use "peek" as the name of this new variable, to indicate that
>> we're not setting it to be the current entity.
>>
>> I prefer "peek" to others, as the former tells me "Hey, I'm only
>> peeking at the rq and not really doing the default behaviour I've been
>> doing which you're used to." So, probably use "peek". ("Peek" also has
>> historical
>> significance...).
>>
>
> Peek it is. Will change.
>
>>> spin_unlock(&rq->lock);
>>> return entity;
>>> }
>>> @@ -238,8 +241,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
>>> list_for_each_entry(entity, &rq->entities, list) {
>>>
>>> if (drm_sched_entity_is_ready(entity)) {
>>> - rq->current_entity = entity;
>>> - reinit_completion(&entity->entity_idle);
>>> + if (dequeue) {
>>
>> if (!peek) {
>>
>
> +1
>
>>> + rq->current_entity = entity;
>>> + reinit_completion(&entity->entity_idle);
>>> + }
>>> spin_unlock(&rq->lock);
>>> return entity;
>>> }
>>> @@ -257,11 +262,12 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
>>> * drm_sched_rq_select_entity_fifo - Select an entity which provides a job
>>> to run
>>> *
>>> * @rq: scheduler run queue to check.
>>> + * @dequeue: dequeue selected entity
>>
>> * @peek: Just find, don't set to current.
>>
>
> +1
>
>>> *
>>> * Find oldest waiting ready entity, returns NULL if none found.> */
>>> static struct drm_sched_entity *
>>> -drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>>> +drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq, bool dequeue)
>>> {
>>> struct rb_node *rb;
>>>
>>> @@ -271,8 +277,10 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq
>>> *rq)
>>>
>>> entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
>>> if (drm_sched_entity_is_ready(entity)) {
>>> - rq->current_entity = entity;
>>> - reinit_completion(&entity->entity_idle);
>>> + if (dequeue) {
>>
>> if (!peek) {
>>
>>> + rq->current_entity = entity;
>>> + reinit_completion(&entity->entity_idle);
>>> + }
>>> break;
>>> }
>>> }
>>> @@ -282,13 +290,102 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq
>>> *rq)
>>> }
>>>
>>> /**
>>> - * drm_sched_submit_queue - scheduler queue submission
>>> + * drm_sched_run_job_queue - queue job submission
>>> + * @sched: scheduler instance
>>> + */
>>
>> Perhaps it would be clearer to a DOC reader if there were verbs
>> in this function comment? I feel this was mentioned in the review
>> to patch 2...
>>
>>> +static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
>>> +{
>>> + if (!READ_ONCE(sched->pause_submit))
>>> + queue_work(sched->submit_wq, &sched->work_run_job);
>>> +}
>>> +
>>> +/**
>>> + * drm_sched_can_queue -- Can we queue more to the hardware?
>>> + * @sched: scheduler instance
>>> + *
>>> + * Return true if we can push more jobs to the hw, otherwise false.
>>> + */
>>> +static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
>>> +{
>>> + return atomic_read(&sched->hw_rq_count) <
>>> + sched->hw_submission_limit;
>>> +}
>>> +
>>> +/**
>>> + * drm_sched_select_entity - Select next entity to process
>>> + *
>>> + * @sched: scheduler instance
>>> + * @dequeue: dequeue selected entity
>>
>> When I see "dequeue" I'm thinking "list_del()". Let's
>> use "peek" here as mentioned above.
>>
>>> + *
>>> + * Returns the entity to process or NULL if none are found.
>>> + */
>>> +static struct drm_sched_entity *
>>> +drm_sched_select_entity(struct drm_gpu_scheduler *sched, bool dequeue)
>>
>> drm_sched_select_entity(struct drm_gpu_scheduler *sched, bool peek)
>>
>
> +1
>
>>> +{
>>> + struct drm_sched_entity *entity;
>>> + int i;
>>> +
>>> + if (!drm_sched_can_queue(sched))
>>> + return NULL;
>>> +
>>> + if (sched->single_entity) {
>>> + if (!READ_ONCE(sched->single_entity->stopped) &&
>>> + drm_sched_entity_is_ready(sched->single_entity))
>>> + return sched->single_entity;
>>> +
>>> + return NULL;
>>> + }
>>> +
>>> + /* Kernel run queue has higher priority than normal run queue*/
>>> + for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN;
>>> i--) {
>>> + entity = sched->sched_policy == DRM_SCHED_POLICY_FIFO ?
>>> + drm_sched_rq_select_entity_fifo(&sched->sched_rq[i],
>>> + dequeue) :
>>> + drm_sched_rq_select_entity_rr(&sched->sched_rq[i],
>>> + dequeue);
>>> + if (entity)
>>> + break;
>>> + }
>>> +
>>> + return entity;
>>> +}
>>> +
>>> +/**
>>> + * drm_sched_run_job_queue_if_ready - queue job submission if ready
>>> * @sched: scheduler instance
>>> */
>>> -static void drm_sched_submit_queue(struct drm_gpu_scheduler *sched)
>>> +static void drm_sched_run_job_queue_if_ready(struct drm_gpu_scheduler
>>> *sched)
>>> +{
>>> + if (drm_sched_select_entity(sched, false))
>>> + drm_sched_run_job_queue(sched);
>>> +}
>>> +
>>> +/**
>>> + * drm_sched_free_job_queue - queue free job
>>
>> * drm_sched_free_job_queue - enqueue free-job work
>>
>>> + *
>>> + * @sched: scheduler instance to queue free job
>>
>> * @sched: scheduler instance to queue free job work for
>>
>>
>
> Will change both.
>
>>> + */
>>> +static void drm_sched_free_job_queue(struct drm_gpu_scheduler *sched)
>>> {
>>> if (!READ_ONCE(sched->pause_submit))
>>> - queue_work(sched->submit_wq, &sched->work_submit);
>>> + queue_work(sched->submit_wq, &sched->work_free_job);
>>> +}
>>> +
>>> +/**
>>> + * drm_sched_free_job_queue_if_ready - queue free job if ready
>>
>> * drm_sched_free_job_queue_if_ready - enqueue free-job work if ready
>>
>
> Will change this too.
>
>>> + *
>>> + * @sched: scheduler instance to queue free job
>>> + */
>>> +static void drm_sched_free_job_queue_if_ready(struct drm_gpu_scheduler
>>> *sched)
>>> +{
>>> + struct drm_sched_job *job;
>>> +
>>> + spin_lock(&sched->job_list_lock);
>>> + job = list_first_entry_or_null(&sched->pending_list,
>>> + struct drm_sched_job, list);
>>> + if (job && dma_fence_is_signaled(&job->s_fence->finished))
>>> + drm_sched_free_job_queue(sched);
>>> + spin_unlock(&sched->job_list_lock);
>>> }
>>>
>>> /**
>>> @@ -310,7 +407,7 @@ static void drm_sched_job_done(struct drm_sched_job
>>> *s_job, int result)
>>> dma_fence_get(&s_fence->finished);
>>> drm_sched_fence_finished(s_fence, result);
>>> dma_fence_put(&s_fence->finished);
>>> - drm_sched_submit_queue(sched);
>>> + drm_sched_free_job_queue(sched);
>>> }
>>>
>>> /**
>>> @@ -885,18 +982,6 @@ void drm_sched_job_cleanup(struct drm_sched_job *job)
>>> }
>>> EXPORT_SYMBOL(drm_sched_job_cleanup);
>>>
>>> -/**
>>> - * drm_sched_can_queue -- Can we queue more to the hardware?
>>> - * @sched: scheduler instance
>>> - *
>>> - * Return true if we can push more jobs to the hw, otherwise false.
>>> - */
>>> -static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
>>> -{
>>> - return atomic_read(&sched->hw_rq_count) <
>>> - sched->hw_submission_limit;
>>> -}
>>> -
>>> /**
>>> * drm_sched_wakeup_if_can_queue - Wake up the scheduler
>>> * @sched: scheduler instance
>>> @@ -906,43 +991,7 @@ static bool drm_sched_can_queue(struct
>>> drm_gpu_scheduler *sched)
>>> void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
>>> {
>>> if (drm_sched_can_queue(sched))
>>> - drm_sched_submit_queue(sched);
>>> -}
>>> -
>>> -/**
>>> - * drm_sched_select_entity - Select next entity to process
>>> - *
>>> - * @sched: scheduler instance
>>> - *
>>> - * Returns the entity to process or NULL if none are found.
>>> - */
>>> -static struct drm_sched_entity *
>>> -drm_sched_select_entity(struct drm_gpu_scheduler *sched)
>>> -{
>>> - struct drm_sched_entity *entity;
>>> - int i;
>>> -
>>> - if (!drm_sched_can_queue(sched))
>>> - return NULL;
>>> -
>>> - if (sched->single_entity) {
>>> - if (!READ_ONCE(sched->single_entity->stopped) &&
>>> - drm_sched_entity_is_ready(sched->single_entity))
>>> - return sched->single_entity;
>>> -
>>> - return NULL;
>>> - }
>>> -
>>> - /* Kernel run queue has higher priority than normal run queue*/
>>> - for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN;
>>> i--) {
>>> - entity = sched->sched_policy == DRM_SCHED_POLICY_FIFO ?
>>> - drm_sched_rq_select_entity_fifo(&sched->sched_rq[i]) :
>>> - drm_sched_rq_select_entity_rr(&sched->sched_rq[i]);
>>> - if (entity)
>>> - break;
>>> - }
>>> -
>>> - return entity;
>>> + drm_sched_run_job_queue(sched);
>>> }
>>>
>>> /**
>>> @@ -974,8 +1023,10 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler
>>> *sched)
>>> typeof(*next), list);
>>>
>>> if (next) {
>>> - next->s_fence->scheduled.timestamp =
>>> - job->s_fence->finished.timestamp;
>>> + if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
>>> + &next->s_fence->scheduled.flags))
>>> + next->s_fence->scheduled.timestamp =
>>> + job->s_fence->finished.timestamp;
>>> /* start TO timer for next job */
>>> drm_sched_start_timeout(sched);
>>> }
>>> @@ -1025,74 +1076,84 @@ drm_sched_pick_best(struct drm_gpu_scheduler
>>> **sched_list,
>>> EXPORT_SYMBOL(drm_sched_pick_best);
>>>
>>> /**
>>> - * drm_sched_main - main scheduler thread
>>> + * drm_sched_free_job_work - worker to call free_job
>>> *
>>> - * @param: scheduler instance
>>> + * @w: free job work
>>> */
>>> -static void drm_sched_main(struct work_struct *w)
>>> +static void drm_sched_free_job_work(struct work_struct *w)
>>> {
>>> struct drm_gpu_scheduler *sched =
>>> - container_of(w, struct drm_gpu_scheduler, work_submit);
>>> - struct drm_sched_entity *entity;
>>> + container_of(w, struct drm_gpu_scheduler, work_free_job);
>>> struct drm_sched_job *cleanup_job;
>>> - int r;
>>>
>>> if (READ_ONCE(sched->pause_submit))
>>> return;
>>>
>>> cleanup_job = drm_sched_get_cleanup_job(sched);
>>> - entity = drm_sched_select_entity(sched);
>>> -
>>> - if (!entity && !cleanup_job)
>>> - return; /* No more work */
>>> -
>>> - if (cleanup_job)
>>> + if (cleanup_job) {
>>> sched->ops->free_job(cleanup_job);
>>>
>>> - if (entity) {
>>> - struct dma_fence *fence;
>>> - struct drm_sched_fence *s_fence;
>>> - struct drm_sched_job *sched_job;
>>> -
>>> - sched_job = drm_sched_entity_pop_job(entity);
>>> - if (!sched_job) {
>>> - complete_all(&entity->entity_idle);
>>> - if (!cleanup_job)
>>> - return; /* No more work */
>>> - goto again;
>>> - }
>>> + drm_sched_free_job_queue_if_ready(sched);
>>> + drm_sched_run_job_queue_if_ready(sched);
>>> + }
>>> +}
>>> +
>>> +/**
>>> + * drm_sched_run_job_work - worker to call run_job
>>> + *
>>> + * @w: run job work
>>> + */
>>> +static void drm_sched_run_job_work(struct work_struct *w)
>>> +{
>>> + struct drm_gpu_scheduler *sched =
>>> + container_of(w, struct drm_gpu_scheduler, work_run_job);
>>> + struct drm_sched_entity *entity;
>>> + struct dma_fence *fence;
>>> + struct drm_sched_fence *s_fence;
>>> + struct drm_sched_job *sched_job;
>>> + int r;
>>>
>>> - s_fence = sched_job->s_fence;
>>> + if (READ_ONCE(sched->pause_submit))
>>> + return;
>>>
>>> - atomic_inc(&sched->hw_rq_count);
>>> - drm_sched_job_begin(sched_job);
>>> + entity = drm_sched_select_entity(sched, true);
>>> + if (!entity)
>>> + return;
>>>
>>> - trace_drm_run_job(sched_job, entity);
>>> - fence = sched->ops->run_job(sched_job);
>>> + sched_job = drm_sched_entity_pop_job(entity);
>>> + if (!sched_job) {
>>> complete_all(&entity->entity_idle);
>>> - drm_sched_fence_scheduled(s_fence, fence);
>>> + return; /* No more work */
>>> + }
>>>
>>> - if (!IS_ERR_OR_NULL(fence)) {
>>> - /* Drop for original kref_init of the fence */
>>> - dma_fence_put(fence);
>>> + s_fence = sched_job->s_fence;
>>>
>>> - r = dma_fence_add_callback(fence, &sched_job->cb,
>>> - drm_sched_job_done_cb);
>>> - if (r == -ENOENT)
>>> - drm_sched_job_done(sched_job, fence->error);
>>> - else if (r)
>>> - DRM_DEV_ERROR(sched->dev, "fence add callback
>>> failed (%d)\n",
>>> - r);
>>> - } else {
>>> - drm_sched_job_done(sched_job, IS_ERR(fence) ?
>>> - PTR_ERR(fence) : 0);
>>> - }
>>> + atomic_inc(&sched->hw_rq_count);
>>> + drm_sched_job_begin(sched_job);
>>> +
>>> + trace_drm_run_job(sched_job, entity);
>>> + fence = sched->ops->run_job(sched_job);
>>> + complete_all(&entity->entity_idle);
>>> + drm_sched_fence_scheduled(s_fence, fence);
>>>
>>> - wake_up(&sched->job_scheduled);
>>> + if (!IS_ERR_OR_NULL(fence)) {
>>> + /* Drop for original kref_init of the fence */
>>> + dma_fence_put(fence);
>>> +
>>> + r = dma_fence_add_callback(fence, &sched_job->cb,
>>> + drm_sched_job_done_cb);
>>> + if (r == -ENOENT)
>>> + drm_sched_job_done(sched_job, fence->error);
>>> + else if (r)
>>> + DRM_DEV_ERROR(sched->dev, "fence add callback failed
>>> (%d)\n",
>>> + r);
>>
>> Please align "r);" to the open brace on the previous line. If you're using
>> Emacs
>> with sane Linux settings, press the "Tab" key anywhere on the line to indent
>> it.
>> (It should run c-indent-line-or-region, usually using leading-tabs-only
>> mode. Pressing
>> it again, over and over, on an already indented line, does nothing. Column
>> indenting--say
>> for columns in 2D/3D/etc., array, usually happens using spaces, which is
>> portable.
>> Also please take an overview with "scrips/checkpatch.pl --strict".)
>>
>
> Will run checkpatch.
>
>> Wrap-around was bumped to 100 in the Linux kernel so you can put the 'r' on
>> the same line without style problems.
>>
>
> Using Vi with wrap around of 80 but know 100 is allowed. Will fix.
Ah, yes--I now see where the real problem is. :-D
--
Regards,
Luben