Re: [PATCH 2/2] drm/sched: Rework HW fence processing.

2018-12-07 Thread Koenig, Christian
Am 07.12.18 um 16:22 schrieb Grodzovsky, Andrey:
>
> On 12/07/2018 03:19 AM, Christian König wrote:
>> Am 07.12.18 um 04:18 schrieb Zhou, David(ChunMing):
 -Original Message-
 From: dri-devel  On Behalf Of
 Andrey Grodzovsky
 Sent: Friday, December 07, 2018 1:41 AM
 To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
 ckoenig.leichtzumer...@gmail.com; e...@anholt.net;
 etna...@lists.freedesktop.org
 Cc: Liu, Monk 
 Subject: [PATCH 2/2] drm/sched: Rework HW fence processing.

 Expedite job deletion from ring mirror list to the HW fence signal
 callback
 instead from finish_work, together with waiting for all such fences
 to signal in
 drm_sched_stop we garantee that already signaled job will not be
 processed
 twice.
 Remove the sched finish fence callback and just submit finish_work
 directly
 from the HW fence callback.

 Suggested-by: Christian Koenig 
 Signed-off-by: Andrey Grodzovsky 
 ---
    drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
 drivers/gpu/drm/scheduler/sched_main.c  | 39 --
 ---
    include/drm/gpu_scheduler.h | 10 +++--
    3 files changed, 30 insertions(+), 23 deletions(-)

 diff --git a/drivers/gpu/drm/scheduler/sched_fence.c
 b/drivers/gpu/drm/scheduler/sched_fence.c
 index d8d2dff..e62c239 100644
 --- a/drivers/gpu/drm/scheduler/sched_fence.c
 +++ b/drivers/gpu/drm/scheduler/sched_fence.c
 @@ -151,7 +151,8 @@ struct drm_sched_fence
 *to_drm_sched_fence(struct dma_fence *f)
 EXPORT_SYMBOL(to_drm_sched_fence);

    struct drm_sched_fence *drm_sched_fence_create(struct
 drm_sched_entity *entity,
 -   void *owner)
 +   void *owner,
 +   struct drm_sched_job *s_job)
    {
    struct drm_sched_fence *fence = NULL;
    unsigned seq;
 @@ -163,6 +164,7 @@ struct drm_sched_fence
 *drm_sched_fence_create(struct drm_sched_entity *entity,
    fence->owner = owner;
    fence->sched = entity->rq->sched;
    spin_lock_init(&fence->lock);
 +    fence->s_job = s_job;

    seq = atomic_inc_return(&entity->fence_seq);
    dma_fence_init(&fence->scheduled,
 &drm_sched_fence_ops_scheduled, diff --git
 a/drivers/gpu/drm/scheduler/sched_main.c
 b/drivers/gpu/drm/scheduler/sched_main.c
 index 8fb7f86..2860037 100644
 --- a/drivers/gpu/drm/scheduler/sched_main.c
 +++ b/drivers/gpu/drm/scheduler/sched_main.c
 @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct
 work_struct *work)
    cancel_delayed_work_sync(&sched->work_tdr);

    spin_lock_irqsave(&sched->job_list_lock, flags);
 -    /* remove job from ring_mirror_list */
 -    list_del_init(&s_job->node);
 -    /* queue TDR for next job */
    drm_sched_start_timeout(sched);
    spin_unlock_irqrestore(&sched->job_list_lock, flags);

    sched->ops->free_job(s_job);
    }

 -static void drm_sched_job_finish_cb(struct dma_fence *f,
 -    struct dma_fence_cb *cb)
 -{
 -    struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
 - finish_cb);
 -    schedule_work(&job->finish_work);
 -}
 -
    static void drm_sched_job_begin(struct drm_sched_job *s_job)  {
    struct drm_gpu_scheduler *sched = s_job->sched;
    unsigned long flags;

 - dma_fence_add_callback(&s_job->s_fence->finished, &s_job-
> finish_cb,
 -   drm_sched_job_finish_cb);
 -
    spin_lock_irqsave(&sched->job_list_lock, flags);
    list_add_tail(&s_job->node, &sched->ring_mirror_list);
    drm_sched_start_timeout(sched);
 @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler
 *sched, bool unpark_only)  {
    struct drm_sched_job *s_job, *tmp;
    bool found_guilty = false;
 -    unsigned long flags;
    int r;

    if (unpark_only)
    goto unpark;

 -    spin_lock_irqsave(&sched->job_list_lock, flags);
 +    /*
 + * Locking the list is not required here as the sched thread is
 parked
 + * so no new jobs are being pushed in to HW and in drm_sched_stop
 we
 + * flushed any in flight jobs who didn't signal yet. Also
 concurrent
 + * GPU recovers can't run in parallel.
 + */
    list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
 node) {
    struct drm_sched_fence *s_fence = s_job->s_fence;
    struct dma_fence *fence;
 @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler
 *sched, bool unpark_only)
    }

    drm_sched_star

Re: [PATCH 2/2] drm/sched: Rework HW fence processing.

2018-12-07 Thread Grodzovsky, Andrey


On 12/07/2018 03:19 AM, Christian König wrote:
> Am 07.12.18 um 04:18 schrieb Zhou, David(ChunMing):
>>
>>> -Original Message-
>>> From: dri-devel  On Behalf Of
>>> Andrey Grodzovsky
>>> Sent: Friday, December 07, 2018 1:41 AM
>>> To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
>>> ckoenig.leichtzumer...@gmail.com; e...@anholt.net;
>>> etna...@lists.freedesktop.org
>>> Cc: Liu, Monk 
>>> Subject: [PATCH 2/2] drm/sched: Rework HW fence processing.
>>>
>>> Expedite job deletion from ring mirror list to the HW fence signal 
>>> callback
>>> instead from finish_work, together with waiting for all such fences 
>>> to signal in
>>> drm_sched_stop we garantee that already signaled job will not be 
>>> processed
>>> twice.
>>> Remove the sched finish fence callback and just submit finish_work 
>>> directly
>>> from the HW fence callback.
>>>
>>> Suggested-by: Christian Koenig 
>>> Signed-off-by: Andrey Grodzovsky 
>>> ---
>>>   drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
>>> drivers/gpu/drm/scheduler/sched_main.c  | 39 --
>>> ---
>>>   include/drm/gpu_scheduler.h | 10 +++--
>>>   3 files changed, 30 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c
>>> b/drivers/gpu/drm/scheduler/sched_fence.c
>>> index d8d2dff..e62c239 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_fence.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
>>> @@ -151,7 +151,8 @@ struct drm_sched_fence
>>> *to_drm_sched_fence(struct dma_fence *f)
>>> EXPORT_SYMBOL(to_drm_sched_fence);
>>>
>>>   struct drm_sched_fence *drm_sched_fence_create(struct
>>> drm_sched_entity *entity,
>>> -   void *owner)
>>> +   void *owner,
>>> +   struct drm_sched_job *s_job)
>>>   {
>>>   struct drm_sched_fence *fence = NULL;
>>>   unsigned seq;
>>> @@ -163,6 +164,7 @@ struct drm_sched_fence
>>> *drm_sched_fence_create(struct drm_sched_entity *entity,
>>>   fence->owner = owner;
>>>   fence->sched = entity->rq->sched;
>>>   spin_lock_init(&fence->lock);
>>> +    fence->s_job = s_job;
>>>
>>>   seq = atomic_inc_return(&entity->fence_seq);
>>>   dma_fence_init(&fence->scheduled,
>>> &drm_sched_fence_ops_scheduled, diff --git
>>> a/drivers/gpu/drm/scheduler/sched_main.c
>>> b/drivers/gpu/drm/scheduler/sched_main.c
>>> index 8fb7f86..2860037 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct
>>> work_struct *work)
>>>   cancel_delayed_work_sync(&sched->work_tdr);
>>>
>>>   spin_lock_irqsave(&sched->job_list_lock, flags);
>>> -    /* remove job from ring_mirror_list */
>>> -    list_del_init(&s_job->node);
>>> -    /* queue TDR for next job */
>>>   drm_sched_start_timeout(sched);
>>>   spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>
>>>   sched->ops->free_job(s_job);
>>>   }
>>>
>>> -static void drm_sched_job_finish_cb(struct dma_fence *f,
>>> -    struct dma_fence_cb *cb)
>>> -{
>>> -    struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
>>> - finish_cb);
>>> -    schedule_work(&job->finish_work);
>>> -}
>>> -
>>>   static void drm_sched_job_begin(struct drm_sched_job *s_job)  {
>>>   struct drm_gpu_scheduler *sched = s_job->sched;
>>>   unsigned long flags;
>>>
>>> - dma_fence_add_callback(&s_job->s_fence->finished, &s_job-
 finish_cb,
>>> -   drm_sched_job_finish_cb);
>>> -
>>>   spin_lock_irqsave(&sched->job_list_lock, flags);
>>>   list_add_tail(&s_job->node, &sched->ring_mirror_list);
>>>   drm_sched_start_timeout(sched);
>>> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler
>>> *sched, bool unpark_only)  {
>>>   struct drm_sched_job *s_job, *tmp;
>>>   bool found_guilty = false;
>>> -    unsigned long flags;
>>>   int r;
>>>
>>>   if (unpark_only)
>>>   goto unpark;
>>>
>>> -    spin_lock_irqsave(&sched->job_list_lock, flags);
>>> +    /*
>>> + * Locking the list is not required here as the sched thread is 
>>> parked
>>> + * so no new jobs are being pushed in to HW and in drm_sched_stop
>>> we
>>> + * flushed any in flight jobs who didn't signal yet. Also 
>>> concurrent
>>> + * GPU recovers can't run in parallel.
>>> + */
>>>   list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
>>> node) {
>>>   struct drm_sched_fence *s_fence = s_job->s_fence;
>>>   struct dma_fence *fence;
>>> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler
>>> *sched, bool unpark_only)
>>>   }
>>>
>>>   drm_sched_start_timeout(sched);
>>> -    spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>>
>>>   unpark:
>>>   kthread_unpark(sched->thread);
>>> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sch

Re: [PATCH 2/2] drm/sched: Rework HW fence processing.

2018-12-07 Thread Christian König

Am 07.12.18 um 04:18 schrieb Zhou, David(ChunMing):



-Original Message-
From: dri-devel  On Behalf Of
Andrey Grodzovsky
Sent: Friday, December 07, 2018 1:41 AM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
ckoenig.leichtzumer...@gmail.com; e...@anholt.net;
etna...@lists.freedesktop.org
Cc: Liu, Monk 
Subject: [PATCH 2/2] drm/sched: Rework HW fence processing.

Expedite job deletion from ring mirror list to the HW fence signal callback
instead from finish_work, together with waiting for all such fences to signal in
drm_sched_stop we garantee that already signaled job will not be processed
twice.
Remove the sched finish fence callback and just submit finish_work directly
from the HW fence callback.

Suggested-by: Christian Koenig 
Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
drivers/gpu/drm/scheduler/sched_main.c  | 39 --
---
  include/drm/gpu_scheduler.h | 10 +++--
  3 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_fence.c
b/drivers/gpu/drm/scheduler/sched_fence.c
index d8d2dff..e62c239 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -151,7 +151,8 @@ struct drm_sched_fence
*to_drm_sched_fence(struct dma_fence *f)
EXPORT_SYMBOL(to_drm_sched_fence);

  struct drm_sched_fence *drm_sched_fence_create(struct
drm_sched_entity *entity,
-  void *owner)
+  void *owner,
+  struct drm_sched_job *s_job)
  {
struct drm_sched_fence *fence = NULL;
unsigned seq;
@@ -163,6 +164,7 @@ struct drm_sched_fence
*drm_sched_fence_create(struct drm_sched_entity *entity,
fence->owner = owner;
fence->sched = entity->rq->sched;
spin_lock_init(&fence->lock);
+   fence->s_job = s_job;

seq = atomic_inc_return(&entity->fence_seq);
dma_fence_init(&fence->scheduled,
&drm_sched_fence_ops_scheduled, diff --git
a/drivers/gpu/drm/scheduler/sched_main.c
b/drivers/gpu/drm/scheduler/sched_main.c
index 8fb7f86..2860037 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct
work_struct *work)
cancel_delayed_work_sync(&sched->work_tdr);

spin_lock_irqsave(&sched->job_list_lock, flags);
-   /* remove job from ring_mirror_list */
-   list_del_init(&s_job->node);
-   /* queue TDR for next job */
drm_sched_start_timeout(sched);
spin_unlock_irqrestore(&sched->job_list_lock, flags);

sched->ops->free_job(s_job);
  }

-static void drm_sched_job_finish_cb(struct dma_fence *f,
-   struct dma_fence_cb *cb)
-{
-   struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
-finish_cb);
-   schedule_work(&job->finish_work);
-}
-
  static void drm_sched_job_begin(struct drm_sched_job *s_job)  {
struct drm_gpu_scheduler *sched = s_job->sched;
unsigned long flags;

-   dma_fence_add_callback(&s_job->s_fence->finished, &s_job-

finish_cb,

-  drm_sched_job_finish_cb);
-
spin_lock_irqsave(&sched->job_list_lock, flags);
list_add_tail(&s_job->node, &sched->ring_mirror_list);
drm_sched_start_timeout(sched);
@@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler
*sched, bool unpark_only)  {
struct drm_sched_job *s_job, *tmp;
bool found_guilty = false;
-   unsigned long flags;
int r;

if (unpark_only)
goto unpark;

-   spin_lock_irqsave(&sched->job_list_lock, flags);
+   /*
+* Locking the list is not required here as the sched thread is parked
+* so no new jobs are being pushed in to HW and in drm_sched_stop
we
+* flushed any in flight jobs who didn't signal yet. Also concurrent
+* GPU recovers can't run in parallel.
+*/
list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
node) {
struct drm_sched_fence *s_fence = s_job->s_fence;
struct dma_fence *fence;
@@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler
*sched, bool unpark_only)
}

drm_sched_start_timeout(sched);
-   spin_unlock_irqrestore(&sched->job_list_lock, flags);

  unpark:
kthread_unpark(sched->thread);
@@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
job->sched = sched;
job->entity = entity;
job->s_priority = entity->rq - sched->sched_rq;
-   job->s_fence = drm_sched_fence_create(entity, owner);
+   job->s_fence = drm_sched_fence_create(entity, owner, job);
if (!job->s_fence)
return -ENOMEM;
job->id = 

RE: [PATCH 2/2] drm/sched: Rework HW fence processing.

2018-12-06 Thread Zhou, David(ChunMing)


> -Original Message-
> From: dri-devel  On Behalf Of
> Andrey Grodzovsky
> Sent: Friday, December 07, 2018 1:41 AM
> To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
> ckoenig.leichtzumer...@gmail.com; e...@anholt.net;
> etna...@lists.freedesktop.org
> Cc: Liu, Monk 
> Subject: [PATCH 2/2] drm/sched: Rework HW fence processing.
> 
> Expedite job deletion from ring mirror list to the HW fence signal callback
> instead from finish_work, together with waiting for all such fences to signal 
> in
> drm_sched_stop we garantee that already signaled job will not be processed
> twice.
> Remove the sched finish fence callback and just submit finish_work directly
> from the HW fence callback.
> 
> Suggested-by: Christian Koenig 
> Signed-off-by: Andrey Grodzovsky 
> ---
>  drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
> drivers/gpu/drm/scheduler/sched_main.c  | 39 --
> ---
>  include/drm/gpu_scheduler.h | 10 +++--
>  3 files changed, 30 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c
> b/drivers/gpu/drm/scheduler/sched_fence.c
> index d8d2dff..e62c239 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -151,7 +151,8 @@ struct drm_sched_fence
> *to_drm_sched_fence(struct dma_fence *f)
> EXPORT_SYMBOL(to_drm_sched_fence);
> 
>  struct drm_sched_fence *drm_sched_fence_create(struct
> drm_sched_entity *entity,
> -void *owner)
> +void *owner,
> +struct drm_sched_job *s_job)
>  {
>   struct drm_sched_fence *fence = NULL;
>   unsigned seq;
> @@ -163,6 +164,7 @@ struct drm_sched_fence
> *drm_sched_fence_create(struct drm_sched_entity *entity,
>   fence->owner = owner;
>   fence->sched = entity->rq->sched;
>   spin_lock_init(&fence->lock);
> + fence->s_job = s_job;
> 
>   seq = atomic_inc_return(&entity->fence_seq);
>   dma_fence_init(&fence->scheduled,
> &drm_sched_fence_ops_scheduled, diff --git
> a/drivers/gpu/drm/scheduler/sched_main.c
> b/drivers/gpu/drm/scheduler/sched_main.c
> index 8fb7f86..2860037 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct
> work_struct *work)
>   cancel_delayed_work_sync(&sched->work_tdr);
> 
>   spin_lock_irqsave(&sched->job_list_lock, flags);
> - /* remove job from ring_mirror_list */
> - list_del_init(&s_job->node);
> - /* queue TDR for next job */
>   drm_sched_start_timeout(sched);
>   spin_unlock_irqrestore(&sched->job_list_lock, flags);
> 
>   sched->ops->free_job(s_job);
>  }
> 
> -static void drm_sched_job_finish_cb(struct dma_fence *f,
> - struct dma_fence_cb *cb)
> -{
> - struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
> -  finish_cb);
> - schedule_work(&job->finish_work);
> -}
> -
>  static void drm_sched_job_begin(struct drm_sched_job *s_job)  {
>   struct drm_gpu_scheduler *sched = s_job->sched;
>   unsigned long flags;
> 
> - dma_fence_add_callback(&s_job->s_fence->finished, &s_job-
> >finish_cb,
> -drm_sched_job_finish_cb);
> -
>   spin_lock_irqsave(&sched->job_list_lock, flags);
>   list_add_tail(&s_job->node, &sched->ring_mirror_list);
>   drm_sched_start_timeout(sched);
> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler
> *sched, bool unpark_only)  {
>   struct drm_sched_job *s_job, *tmp;
>   bool found_guilty = false;
> - unsigned long flags;
>   int r;
> 
>   if (unpark_only)
>   goto unpark;
> 
> - spin_lock_irqsave(&sched->job_list_lock, flags);
> + /*
> +  * Locking the list is not required here as the sched thread is parked
> +  * so no new jobs are being pushed in to HW and in drm_sched_stop
> we
> +  * flushed any in flight jobs who didn't signal yet. Also concurrent
> +  * GPU recovers can't run in parallel.
> +  */
>   list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
> node) {
>   struct drm_sched_fence *s_fence = s_job->s_fence;
>   struct dma_fence *fence;
> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler
> *sched, bool unpark_only)
>   }
> 
>   drm_sched_start_timeout(sched);
> - spin_unlock_irqrestore(&sched->job_list_lock, flags);
> 
>  unpark:
>   kthread_unpark(sched->thread);
> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
>   job->sched = sched;
>   job->entity = entity;
>   job->s_priority = entity->rq - sched->sched_rq;
> - job->s_fence = drm_sched_fence_create(entity, owner);
> + job->s_fence = drm_sched_fence_create(entity, owne

Re: [PATCH 2/2] drm/sched: Rework HW fence processing.

2018-12-06 Thread Grodzovsky, Andrey

On 12/06/2018 12:41 PM, Andrey Grodzovsky wrote:
> Expedite job deletion from ring mirror list to the HW fence signal
> callback instead from finish_work, together with waiting for all
> such fences to signal in drm_sched_stop we garantee that
> already signaled job will not be processed twice.
> Remove the sched finish fence callback and just submit finish_work
> directly from the HW fence callback.
>
> Suggested-by: Christian Koenig 
> Signed-off-by: Andrey Grodzovsky 
> ---
>   drivers/gpu/drm/scheduler/sched_fence.c |  4 +++-
>   drivers/gpu/drm/scheduler/sched_main.c  | 39 
> -
>   include/drm/gpu_scheduler.h | 10 +++--
>   3 files changed, 30 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c 
> b/drivers/gpu/drm/scheduler/sched_fence.c
> index d8d2dff..e62c239 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -151,7 +151,8 @@ struct drm_sched_fence *to_drm_sched_fence(struct 
> dma_fence *f)
>   EXPORT_SYMBOL(to_drm_sched_fence);
>   
>   struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity 
> *entity,
> -void *owner)
> +void *owner,
> +struct drm_sched_job *s_job)
>   {
>   struct drm_sched_fence *fence = NULL;
>   unsigned seq;
> @@ -163,6 +164,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct 
> drm_sched_entity *entity,
>   fence->owner = owner;
>   fence->sched = entity->rq->sched;
>   spin_lock_init(&fence->lock);
> + fence->s_job = s_job;
>   
>   seq = atomic_inc_return(&entity->fence_seq);
>   dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled,
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
> b/drivers/gpu/drm/scheduler/sched_main.c
> index 8fb7f86..2860037 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct work_struct 
> *work)
>   cancel_delayed_work_sync(&sched->work_tdr);
>   
>   spin_lock_irqsave(&sched->job_list_lock, flags);
> - /* remove job from ring_mirror_list */
> - list_del_init(&s_job->node);
> - /* queue TDR for next job */
>   drm_sched_start_timeout(sched);
>   spin_unlock_irqrestore(&sched->job_list_lock, flags);
>   
>   sched->ops->free_job(s_job);
>   }
>   
> -static void drm_sched_job_finish_cb(struct dma_fence *f,
> - struct dma_fence_cb *cb)
> -{
> - struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
> -  finish_cb);
> - schedule_work(&job->finish_work);
> -}
> -
>   static void drm_sched_job_begin(struct drm_sched_job *s_job)
>   {
>   struct drm_gpu_scheduler *sched = s_job->sched;
>   unsigned long flags;
>   
> - dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
> -drm_sched_job_finish_cb);
> -
>   spin_lock_irqsave(&sched->job_list_lock, flags);
>   list_add_tail(&s_job->node, &sched->ring_mirror_list);
>   drm_sched_start_timeout(sched);
> @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, 
> bool unpark_only)
>   {
>   struct drm_sched_job *s_job, *tmp;
>   bool found_guilty = false;
> - unsigned long flags;
>   int r;
>   
>   if (unpark_only)
>   goto unpark;
>   
> - spin_lock_irqsave(&sched->job_list_lock, flags);
> + /*
> +  * Locking the list is not required here as the sched thread is parked
> +  * so no new jobs are being pushed in to HW and in drm_sched_stop we
> +  * flushed any in flight jobs who didn't signal yet.

The comment is inaccurate here - it's supposed to be ' any in flight 
jobs who already have their
sched finished signaled and they are removed from the mirror ring list 
at that point already anyway'
I will fix this text later with other comments received on the patches.

Andrey
> Also concurrent
> +  * GPU recovers can't run in parallel.
> +  */
>   list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
>   struct drm_sched_fence *s_fence = s_job->s_fence;
>   struct dma_fence *fence;
> @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, 
> bool unpark_only)
>   }
>   
>   drm_sched_start_timeout(sched);
> - spin_unlock_irqrestore(&sched->job_list_lock, flags);
>   
>   unpark:
>   kthread_unpark(sched->thread);
> @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
>   job->sched = sched;
>   job->entity = entity;
>   job->s_priority = entity->rq - sched->sched_rq;
> - job->s_fence = drm_sched_fence_create(entity, owner);
> + job->s_fence = drm_sched_fence_create(e