Re: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

2017-10-23 Thread Christian König

Am 20.10.2017 um 15:32 schrieb Andrey Grodzovsky:

Switch from kfifo to SPSC queue.

Bug:
Kfifo is limited at size, during GPU reset it would fill up to limit
and the pushing thread (producer) would wait for the scheduler worker to
consume the items in the fifo while holding reservation lock
on a BO. The gpu reset thread on the other hand blocks the scheduler
during reset. Before it unblocks the sceduler it might want
to recover VRAM and so will try to reserve the same BO the producer
thread is already holding creating a deadlock.

Fix:
Switch from kfifo to SPSC queue which is unlimited in size.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h |  4 +-
  drivers/gpu/drm/amd/scheduler/gpu_scheduler.c   | 51 ++---
  drivers/gpu/drm/amd/scheduler/gpu_scheduler.h   |  4 +-
  3 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h 
b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
index 8bd3810..86838a8 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -28,8 +28,8 @@ TRACE_EVENT(amd_sched_job,
   __entry->id = sched_job->id;
   __entry->fence = &sched_job->s_fence->finished;
   __entry->name = sched_job->sched->name;
-  __entry->job_count = kfifo_len(
-  &sched_job->s_entity->job_queue) / 
sizeof(sched_job);
+  __entry->job_count = spsc_queue_count(
+  &sched_job->s_entity->job_queue);
   __entry->hw_job_count = atomic_read(
   &sched_job->sched->hw_rq_count);
   ),
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 1bbbce2..0c9cdc0 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -28,9 +28,14 @@
  #include 
  #include "gpu_scheduler.h"
  
+#include "spsc_queue.h"

+
  #define CREATE_TRACE_POINTS
  #include "gpu_sched_trace.h"
  
+#define to_amd_sched_job(sched_job)		\

+   container_of((sched_job), struct amd_sched_job, queue_node)
+
  static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
  static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
  static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb 
*cb);
@@ -123,8 +128,6 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
  struct amd_sched_rq *rq,
  uint32_t jobs)
  {
-   int r;
-
if (!(sched && entity && rq))
return -EINVAL;
  
@@ -135,9 +138,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
  
  	spin_lock_init(&entity->rq_lock);

spin_lock_init(&entity->queue_lock);
-   r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL);
-   if (r)
-   return r;
+   spsc_queue_init(&entity->job_queue);
  
  	atomic_set(&entity->fence_seq, 0);

entity->fence_context = dma_fence_context_alloc(2);
@@ -170,7 +171,7 @@ static bool amd_sched_entity_is_initialized(struct 
amd_gpu_scheduler *sched,
  static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
  {
rmb();
-   if (kfifo_is_empty(&entity->job_queue))
+   if (spsc_queue_peek(&entity->job_queue) == NULL)
return true;
  
  	return false;

@@ -185,7 +186,7 @@ static bool amd_sched_entity_is_idle(struct 
amd_sched_entity *entity)
   */
  static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)
  {
-   if (kfifo_is_empty(&entity->job_queue))
+   if (spsc_queue_peek(&entity->job_queue) == NULL)
return false;
  
  	if (ACCESS_ONCE(entity->dependency))

@@ -227,7 +228,7 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 */
kthread_park(sched->thread);
kthread_unpark(sched->thread);
-   while (kfifo_out(&entity->job_queue, &job, sizeof(job))) {
+   while ((job = 
to_amd_sched_job(spsc_queue_pop(&entity->job_queue {
struct amd_sched_fence *s_fence = job->s_fence;
amd_sched_fence_scheduled(s_fence);
dma_fence_set_error(&s_fence->finished, -ESRCH);
@@ -235,9 +236,7 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
dma_fence_put(&s_fence->finished);
sched->ops->free_job(job);
}
-
}
-   kfifo_free(&entity->job_queue);
  }
  
  static void amd_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)

@@ -332,18 +331,21 @@ static bool amd_sched_entity_add_dependency_cb(struct 
amd_sched_entity *entity)
  }
  
  static s

RE: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

2017-10-23 Thread Liu, Monk
I read the source again, looks like I misunderstand your point, let me clear it 
again:

1, the cs_submit deadlock can be resolved by moving eu_fence_buffer()
2, in GEM_VA IOCTL, the reservation lock is hold in the very beginning, and 
vm_update_directories() need that lock held till all related JOBs submitted to 
VM entity, so if VM entity is full the deadlock could hit, and we can not move 
"eu_backoff_reservaton" to before push_job() under job_submit() due to 
concurrent concerns
3, for TTM moves, e.g. ttm_bo_moves(), it will take the reserve lock on the bo 
from swap_lru list and then call into amdgpu ttm part, so the deadlock could 
also hit in job_sumbit() when KFIFO is FULL.

You guys are right, the the second and third deadlock is still there 

BR Monk

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of Liu, 
Monk
Sent: 2017年10月23日 15:39
To: Koenig, Christian ; Grodzovsky, Andrey 
; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

Let's make it clear enough:
> That won't work. All VM updates must be completed while the reservation 
> object lock is still held, otherwise you run into possible concurrent VM 
> updates.

All VM updates will be represented as fence which will be hook in the sync 
object, and after we push job to scheduler, we immediately call 
ttm_eu_fence_buffer_objects() So that means the reservation object lock is 
*not* hold anymore, right ? but keep in mind that this point scheduler may not 
even begin to work, so I don't believe This sentence " All VM updates must be 
completed while the reservation object lock is still held"

> No, TTM callbacks into our driver to do the move. So we don't have an 
> opportunity to drop the reservation lock.

For TTM callbacks, can you help illustrate how the deadlock triggered ??? 


BR Monk


-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
Sent: 2017年10月23日 15:31
To: Liu, Monk ; Koenig, Christian ; 
Grodzovsky, Andrey ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

> For VM updates, the situation is same as commands submission, deadlock 
> can also be solved by moving ttm_eu_fence_buffer_objects() ahead of
> push_job()
That won't work. All VM updates must be completed while the reservation object 
lock is still held, otherwise you run into possible concurrent VM updates.

Keep in mind that you can have multiple context per VM, so that isn't covered 
by the per context lock.

> For TTM buffers moves can we pardon gpu_reset routine and wait till the TTM 
> moves complete ?
No, TTM callbacks into our driver to do the move. So we don't have an 
opportunity to drop the reservation lock.

Additional to that kfifo has some other drawbacks as well. For example we waste 
around 20k of memory for each context for multi media ring buffers which are 
mostly unused.

Regards,
Christian.

Am 23.10.2017 um 09:24 schrieb Liu, Monk:
> For VM updates, the situation is same as commands submission, deadlock 
> can also be solved by moving ttm_eu_fence_buffer_objects() ahead of 
> push_job() For TTM buffers moves can we pardon gpu_reset routine and wait 
> till the TTM moves complete ?
>
> -Original Message-
> From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
> Sent: 2017年10月23日 15:09
> To: Liu, Monk ; Grodzovsky, Andrey 
> ; amd-gfx@lists.freedesktop.org
> Cc: Koenig, Christian 
> Subject: Re: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.
>
> We discussed that as well, problem is that this won't be sufficient.
>
> We push to the kfifo not only during command submission, but also for VM 
> updates and TTM buffers moves.
>
> So we can still deadlock because of them.
>
> Regards,
> Christian.
>
> Am 23.10.2017 um 05:03 schrieb Liu, Monk:
>> Why not use a more simple way ?
>>
>> Like moving ttm_eu_fence_buffer_objects() to before 
>> amd_sched_entity_push_job() ?
>> That could solve the deadlock from your description
>>
>> And the push order is already guaranteed by context->mutex (which is 
>> also a patch from you)
>>
>>
>> BR Monk
>>
>> -Original Message-
>> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On 
>> Behalf Of Andrey Grodzovsky
>> Sent: 2017年10月20日 21:32
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Grodzovsky, Andrey ; Koenig, Christian 
>> 
>> Subject: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.
>>
>> Switch from kfifo to SPSC queue.
>>
>> Bug:
>> Kfifo is limited at size, during GPU reset it would fill up to limit and the 
>> pushing thread (producer) would wait for the scheduler

RE: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

2017-10-23 Thread Liu, Monk
Let's make it clear enough:
> That won't work. All VM updates must be completed while the reservation 
> object lock is still held, otherwise you run into possible concurrent VM 
> updates.

All VM updates will be represented as fence which will be hook in the sync 
object, and after we push job to scheduler, we immediately call 
ttm_eu_fence_buffer_objects()
So that means the reservation object lock is *not* hold anymore, right ? but 
keep in mind that this point scheduler may not even begin to work, so I don't 
believe 
This sentence " All VM updates must be completed while the reservation object 
lock is still held"

> No, TTM callbacks into our driver to do the move. So we don't have an 
> opportunity to drop the reservation lock.

For TTM callbacks, can you help illustrate how the deadlock triggered ??? 


BR Monk


-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: 2017年10月23日 15:31
To: Liu, Monk ; Koenig, Christian ; 
Grodzovsky, Andrey ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

> For VM updates, the situation is same as commands submission, deadlock 
> can also be solved by moving ttm_eu_fence_buffer_objects() ahead of 
> push_job()
That won't work. All VM updates must be completed while the reservation object 
lock is still held, otherwise you run into possible concurrent VM updates.

Keep in mind that you can have multiple context per VM, so that isn't covered 
by the per context lock.

> For TTM buffers moves can we pardon gpu_reset routine and wait till the TTM 
> moves complete ?
No, TTM callbacks into our driver to do the move. So we don't have an 
opportunity to drop the reservation lock.

Additional to that kfifo has some other drawbacks as well. For example we waste 
around 20k of memory for each context for multi media ring buffers which are 
mostly unused.

Regards,
Christian.

Am 23.10.2017 um 09:24 schrieb Liu, Monk:
> For VM updates, the situation is same as commands submission, deadlock 
> can also be solved by moving ttm_eu_fence_buffer_objects() ahead of 
> push_job() For TTM buffers moves can we pardon gpu_reset routine and wait 
> till the TTM moves complete ?
>
> -Original Message-
> From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
> Sent: 2017年10月23日 15:09
> To: Liu, Monk ; Grodzovsky, Andrey 
> ; amd-gfx@lists.freedesktop.org
> Cc: Koenig, Christian 
> Subject: Re: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.
>
> We discussed that as well, problem is that this won't be sufficient.
>
> We push to the kfifo not only during command submission, but also for VM 
> updates and TTM buffers moves.
>
> So we can still deadlock because of them.
>
> Regards,
> Christian.
>
> Am 23.10.2017 um 05:03 schrieb Liu, Monk:
>> Why not use a more simple way ?
>>
>> Like moving ttm_eu_fence_buffer_objects() to before 
>> amd_sched_entity_push_job() ?
>> That could solve the deadlock from your description
>>
>> And the push order is already guaranteed by context->mutex (which is 
>> also a patch from you)
>>
>>
>> BR Monk
>>
>> -Original Message-
>> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On 
>> Behalf Of Andrey Grodzovsky
>> Sent: 2017年10月20日 21:32
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Grodzovsky, Andrey ; Koenig, Christian 
>> 
>> Subject: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.
>>
>> Switch from kfifo to SPSC queue.
>>
>> Bug:
>> Kfifo is limited at size, during GPU reset it would fill up to limit and the 
>> pushing thread (producer) would wait for the scheduler worker to consume the 
>> items in the fifo while holding reservation lock on a BO. The gpu reset 
>> thread on the other hand blocks the scheduler during reset. Before it 
>> unblocks the sceduler it might want to recover VRAM and so will try to 
>> reserve the same BO the producer thread is already holding creating a 
>> deadlock.
>>
>> Fix:
>> Switch from kfifo to SPSC queue which is unlimited in size.
>>
>> Signed-off-by: Andrey Grodzovsky 
>> ---
>>drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h |  4 +-
>>drivers/gpu/drm/amd/scheduler/gpu_scheduler.c   | 51 
>> ++---
>>drivers/gpu/drm/amd/scheduler/gpu_scheduler.h   |  4 +-
>>3 files changed, 26 insertions(+), 33 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
>> b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
>> index 8bd3810..86838a8 100644
>> --- a/drivers/gpu/drm/amd/scheduler/gpu_sched

Re: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

2017-10-23 Thread Christian König

For VM updates, the situation is same as commands submission, deadlock can also 
be solved by moving ttm_eu_fence_buffer_objects() ahead of push_job()
That won't work. All VM updates must be completed while the reservation 
object lock is still held, otherwise you run into possible concurrent VM 
updates.


Keep in mind that you can have multiple context per VM, so that isn't 
covered by the per context lock.



For TTM buffers moves can we pardon gpu_reset routine and wait till the TTM 
moves complete ?
No, TTM callbacks into our driver to do the move. So we don't have an 
opportunity to drop the reservation lock.


Additional to that kfifo has some other drawbacks as well. For example 
we waste around 20k of memory for each context for multi media ring 
buffers which are mostly unused.


Regards,
Christian.

Am 23.10.2017 um 09:24 schrieb Liu, Monk:

For VM updates, the situation is same as commands submission, deadlock can also 
be solved by moving ttm_eu_fence_buffer_objects() ahead of push_job()
For TTM buffers moves can we pardon gpu_reset routine and wait till the TTM 
moves complete ?

-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com]
Sent: 2017年10月23日 15:09
To: Liu, Monk ; Grodzovsky, Andrey 
; amd-gfx@lists.freedesktop.org
Cc: Koenig, Christian 
Subject: Re: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

We discussed that as well, problem is that this won't be sufficient.

We push to the kfifo not only during command submission, but also for VM 
updates and TTM buffers moves.

So we can still deadlock because of them.

Regards,
Christian.

Am 23.10.2017 um 05:03 schrieb Liu, Monk:

Why not use a more simple way ?

Like moving ttm_eu_fence_buffer_objects() to before amd_sched_entity_push_job() 
?
That could solve the deadlock from your description

And the push order is already guaranteed by context->mutex (which is
also a patch from you)


BR Monk

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf
Of Andrey Grodzovsky
Sent: 2017年10月20日 21:32
To: amd-gfx@lists.freedesktop.org
Cc: Grodzovsky, Andrey ; Koenig, Christian

Subject: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

Switch from kfifo to SPSC queue.

Bug:
Kfifo is limited at size, during GPU reset it would fill up to limit and the 
pushing thread (producer) would wait for the scheduler worker to consume the 
items in the fifo while holding reservation lock on a BO. The gpu reset thread 
on the other hand blocks the scheduler during reset. Before it unblocks the 
sceduler it might want to recover VRAM and so will try to reserve the same BO 
the producer thread is already holding creating a deadlock.

Fix:
Switch from kfifo to SPSC queue which is unlimited in size.

Signed-off-by: Andrey Grodzovsky 
---
   drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h |  4 +-
   drivers/gpu/drm/amd/scheduler/gpu_scheduler.c   | 51 
++---
   drivers/gpu/drm/amd/scheduler/gpu_scheduler.h   |  4 +-
   3 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
index 8bd3810..86838a8 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -28,8 +28,8 @@ TRACE_EVENT(amd_sched_job,
   __entry->id = sched_job->id;
   __entry->fence = &sched_job->s_fence->finished;
   __entry->name = sched_job->sched->name;
-  __entry->job_count = kfifo_len(
-  &sched_job->s_entity->job_queue) / 
sizeof(sched_job);
+  __entry->job_count = spsc_queue_count(
+  &sched_job->s_entity->job_queue);
   __entry->hw_job_count = atomic_read(
   &sched_job->sched->hw_rq_count);
   ),
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 1bbbce2..0c9cdc0 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -28,9 +28,14 @@
   #include 
   #include "gpu_scheduler.h"
   
+#include "spsc_queue.h"

+
   #define CREATE_TRACE_POINTS
   #include "gpu_sched_trace.h"
   
+#define to_amd_sched_job(sched_job)		\

+   container_of((sched_job), struct amd_sched_job, queue_node)
+
   static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);  
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);  static void 
amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb); @@ -123,8 
+128,6 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
  str

RE: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

2017-10-23 Thread Liu, Monk
For VM updates, the situation is same as commands submission, deadlock can also 
be solved by moving ttm_eu_fence_buffer_objects() ahead of push_job()
For TTM buffers moves can we pardon gpu_reset routine and wait till the TTM 
moves complete ?

-Original Message-
From: Christian König [mailto:ckoenig.leichtzumer...@gmail.com] 
Sent: 2017年10月23日 15:09
To: Liu, Monk ; Grodzovsky, Andrey 
; amd-gfx@lists.freedesktop.org
Cc: Koenig, Christian 
Subject: Re: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

We discussed that as well, problem is that this won't be sufficient.

We push to the kfifo not only during command submission, but also for VM 
updates and TTM buffers moves.

So we can still deadlock because of them.

Regards,
Christian.

Am 23.10.2017 um 05:03 schrieb Liu, Monk:
> Why not use a more simple way ?
>
> Like moving ttm_eu_fence_buffer_objects() to before 
> amd_sched_entity_push_job() ?
> That could solve the deadlock from your description
>
> And the push order is already guaranteed by context->mutex (which is 
> also a patch from you)
>
>
> BR Monk
>
> -Original Message-
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf 
> Of Andrey Grodzovsky
> Sent: 2017年10月20日 21:32
> To: amd-gfx@lists.freedesktop.org
> Cc: Grodzovsky, Andrey ; Koenig, Christian 
> 
> Subject: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.
>
> Switch from kfifo to SPSC queue.
>
> Bug:
> Kfifo is limited at size, during GPU reset it would fill up to limit and the 
> pushing thread (producer) would wait for the scheduler worker to consume the 
> items in the fifo while holding reservation lock on a BO. The gpu reset 
> thread on the other hand blocks the scheduler during reset. Before it 
> unblocks the sceduler it might want to recover VRAM and so will try to 
> reserve the same BO the producer thread is already holding creating a 
> deadlock.
>
> Fix:
> Switch from kfifo to SPSC queue which is unlimited in size.
>
> Signed-off-by: Andrey Grodzovsky 
> ---
>   drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h |  4 +-
>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.c   | 51 
> ++---
>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.h   |  4 +-
>   3 files changed, 26 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h 
> b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
> index 8bd3810..86838a8 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
> @@ -28,8 +28,8 @@ TRACE_EVENT(amd_sched_job,
>  __entry->id = sched_job->id;
>  __entry->fence = &sched_job->s_fence->finished;
>  __entry->name = sched_job->sched->name;
> -__entry->job_count = kfifo_len(
> -&sched_job->s_entity->job_queue) / 
> sizeof(sched_job);
> +__entry->job_count = spsc_queue_count(
> +&sched_job->s_entity->job_queue);
>  __entry->hw_job_count = atomic_read(
>  &sched_job->sched->hw_rq_count);
>  ),
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c 
> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> index 1bbbce2..0c9cdc0 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> @@ -28,9 +28,14 @@
>   #include 
>   #include "gpu_scheduler.h"
>   
> +#include "spsc_queue.h"
> +
>   #define CREATE_TRACE_POINTS
>   #include "gpu_sched_trace.h"
>   
> +#define to_amd_sched_job(sched_job)  \
> + container_of((sched_job), struct amd_sched_job, queue_node)
> +
>   static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);  
> static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);  static void 
> amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb); @@ 
> -123,8 +128,6 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
> struct amd_sched_rq *rq,
> uint32_t jobs)
>   {
> - int r;
> -
>   if (!(sched && entity && rq))
>   return -EINVAL;
>   
> @@ -135,9 +138,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler 
> *sched,
>   
>   spin_lock_init(&entity->rq_lock);
>   spin_lock_init(&entity->queue_lock);
> - r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL);
>

Re: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

2017-10-23 Thread Christian König

We discussed that as well, problem is that this won't be sufficient.

We push to the kfifo not only during command submission, but also for VM 
updates and TTM buffers moves.


So we can still deadlock because of them.

Regards,
Christian.

Am 23.10.2017 um 05:03 schrieb Liu, Monk:

Why not use a more simple way ?

Like moving ttm_eu_fence_buffer_objects() to before amd_sched_entity_push_job() 
?
That could solve the deadlock from your description

And the push order is already guaranteed by context->mutex (which is also a 
patch from you)


BR Monk

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Andrey Grodzovsky
Sent: 2017年10月20日 21:32
To: amd-gfx@lists.freedesktop.org
Cc: Grodzovsky, Andrey ; Koenig, Christian 

Subject: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

Switch from kfifo to SPSC queue.

Bug:
Kfifo is limited at size, during GPU reset it would fill up to limit and the 
pushing thread (producer) would wait for the scheduler worker to consume the 
items in the fifo while holding reservation lock on a BO. The gpu reset thread 
on the other hand blocks the scheduler during reset. Before it unblocks the 
sceduler it might want to recover VRAM and so will try to reserve the same BO 
the producer thread is already holding creating a deadlock.

Fix:
Switch from kfifo to SPSC queue which is unlimited in size.

Signed-off-by: Andrey Grodzovsky 
---
  drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h |  4 +-
  drivers/gpu/drm/amd/scheduler/gpu_scheduler.c   | 51 ++---
  drivers/gpu/drm/amd/scheduler/gpu_scheduler.h   |  4 +-
  3 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h 
b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
index 8bd3810..86838a8 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -28,8 +28,8 @@ TRACE_EVENT(amd_sched_job,
   __entry->id = sched_job->id;
   __entry->fence = &sched_job->s_fence->finished;
   __entry->name = sched_job->sched->name;
-  __entry->job_count = kfifo_len(
-  &sched_job->s_entity->job_queue) / 
sizeof(sched_job);
+  __entry->job_count = spsc_queue_count(
+  &sched_job->s_entity->job_queue);
   __entry->hw_job_count = atomic_read(
   &sched_job->sched->hw_rq_count);
   ),
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 1bbbce2..0c9cdc0 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -28,9 +28,14 @@
  #include 
  #include "gpu_scheduler.h"
  
+#include "spsc_queue.h"

+
  #define CREATE_TRACE_POINTS
  #include "gpu_sched_trace.h"
  
+#define to_amd_sched_job(sched_job)		\

+   container_of((sched_job), struct amd_sched_job, queue_node)
+
  static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);  
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);  static void 
amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb); @@ -123,8 
+128,6 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
  struct amd_sched_rq *rq,
  uint32_t jobs)
  {
-   int r;
-
if (!(sched && entity && rq))
return -EINVAL;
  
@@ -135,9 +138,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
  
  	spin_lock_init(&entity->rq_lock);

spin_lock_init(&entity->queue_lock);
-   r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL);
-   if (r)
-   return r;
+   spsc_queue_init(&entity->job_queue);
  
  	atomic_set(&entity->fence_seq, 0);

entity->fence_context = dma_fence_context_alloc(2); @@ -170,7 +171,7 @@ 
static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,  
static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)  {
rmb();
-   if (kfifo_is_empty(&entity->job_queue))
+   if (spsc_queue_peek(&entity->job_queue) == NULL)
return true;
  
  	return false;

@@ -185,7 +186,7 @@ static bool amd_sched_entity_is_idle(struct 
amd_sched_entity *entity)
   */
  static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)  {
-   if (kfifo_is_empty(&entity->job_queue))
+   if (spsc_queue_peek(&entity->job_queue) == NULL)
return false;
  
  	if (ACCESS_ONCE(entity->dependency))

@@ -227,7 +228,7 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 */
kthread_park(sched->thread);
kthread_unpark(sched->thread);
-   while (kfifo_out(&entity->j

RE: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

2017-10-22 Thread Liu, Monk
Why not use a more simple way ?

Like moving ttm_eu_fence_buffer_objects() to before amd_sched_entity_push_job() 
?
That could solve the deadlock from your description 

And the push order is already guaranteed by context->mutex (which is also a 
patch from you)


BR Monk

-Original Message-
From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf Of 
Andrey Grodzovsky
Sent: 2017年10月20日 21:32
To: amd-gfx@lists.freedesktop.org
Cc: Grodzovsky, Andrey ; Koenig, Christian 

Subject: [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset.

Switch from kfifo to SPSC queue.

Bug:
Kfifo is limited at size, during GPU reset it would fill up to limit and the 
pushing thread (producer) would wait for the scheduler worker to consume the 
items in the fifo while holding reservation lock on a BO. The gpu reset thread 
on the other hand blocks the scheduler during reset. Before it unblocks the 
sceduler it might want to recover VRAM and so will try to reserve the same BO 
the producer thread is already holding creating a deadlock.

Fix:
Switch from kfifo to SPSC queue which is unlimited in size.

Signed-off-by: Andrey Grodzovsky 
---
 drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h |  4 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c   | 51 ++---
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h   |  4 +-
 3 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h 
b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
index 8bd3810..86838a8 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -28,8 +28,8 @@ TRACE_EVENT(amd_sched_job,
   __entry->id = sched_job->id;
   __entry->fence = &sched_job->s_fence->finished;
   __entry->name = sched_job->sched->name;
-  __entry->job_count = kfifo_len(
-  &sched_job->s_entity->job_queue) / 
sizeof(sched_job);
+  __entry->job_count = spsc_queue_count(
+  &sched_job->s_entity->job_queue);
   __entry->hw_job_count = atomic_read(
   &sched_job->sched->hw_rq_count);
   ),
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 1bbbce2..0c9cdc0 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -28,9 +28,14 @@
 #include 
 #include "gpu_scheduler.h"
 
+#include "spsc_queue.h"
+
 #define CREATE_TRACE_POINTS
 #include "gpu_sched_trace.h"
 
+#define to_amd_sched_job(sched_job)\
+   container_of((sched_job), struct amd_sched_job, queue_node)
+
 static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);  
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);  static void 
amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb); @@ -123,8 
+128,6 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
  struct amd_sched_rq *rq,
  uint32_t jobs)
 {
-   int r;
-
if (!(sched && entity && rq))
return -EINVAL;
 
@@ -135,9 +138,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 
spin_lock_init(&entity->rq_lock);
spin_lock_init(&entity->queue_lock);
-   r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL);
-   if (r)
-   return r;
+   spsc_queue_init(&entity->job_queue);
 
atomic_set(&entity->fence_seq, 0);
entity->fence_context = dma_fence_context_alloc(2); @@ -170,7 +171,7 @@ 
static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,  
static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)  {
rmb();
-   if (kfifo_is_empty(&entity->job_queue))
+   if (spsc_queue_peek(&entity->job_queue) == NULL)
return true;
 
return false;
@@ -185,7 +186,7 @@ static bool amd_sched_entity_is_idle(struct 
amd_sched_entity *entity)
  */
 static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)  {
-   if (kfifo_is_empty(&entity->job_queue))
+   if (spsc_queue_peek(&entity->job_queue) == NULL)
return false;
 
if (ACCESS_ONCE(entity->dependency))
@@ -227,7 +228,7 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 */
kthread_park(sched->thread);
kthread_unpark(sched->thread);
-   while (kfifo_out(&entity->job_queue, &job, sizeof(job))) {
+   while ((job = 
to_amd_sched_job(spsc_queue_pop(&entity->job_queue 
+{
struct amd_sched_fence *s_fence = job->s_fence;
amd_sched_fence_scheduled(s_fence);
d