Re: [Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences

2015-10-28 Thread John Harrison

On 27/07/2015 12:33, Tvrtko Ursulin wrote:


Hi,

On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote:

From: John Harrison 

The intended usage model for struct fence is that the signalled 
status should be
set on demand rather than polled. That is, there should not be a need 
for a
'signaled' function to be called everytime the status is queried. 
Instead,
'something' should be done to enable a signal callback from the 
hardware which
will update the state directly. In the case of requests, this is the 
seqno
update interrupt. The idea is that this callback will only be enabled 
on demand

when something actually tries to wait on the fence.

This change removes the polling test and replaces it with the 
callback scheme.

Each fence is added to a 'please poke me' list at the start of
i915_add_request(). The interrupt handler then scans through the 
'poke me' list
when a new seqno pops out and signals any matching fence/request. The 
fence is
then removed from the list so the entire request stack does not need 
to be
scanned every time. Note that the fence is added to the list before 
the commands
to generate the seqno interrupt are added to the ring. Thus the 
sequence is

guaranteed to be race free if the interrupt is already enabled.

Note that the interrupt is only enabled on demand (i.e. when 
__wait_request() is
called). Thus there is still a potential race when enabling the 
interrupt as the
request may already have completed. However, this is simply solved by 
calling
the interrupt processing code immediately after enabling the 
interrupt and

thereby checking for already completed requests.

Lastly, the ring clean up code has the possibility to cancel outstanding
requests (e.g. because TDR has reset the ring). These requests will 
never get
signalled and so must be removed from the signal list manually. This 
is done by
setting a 'cancelled' flag and then calling the regular notify/retire 
code path
rather than attempting to duplicate the list manipulatation and clean 
up code in
multiple places. This also avoid any race condition where the 
cancellation
request might occur after/during the completion interrupt actually 
arriving.


v2: Updated to take advantage of the request unreference no longer 
requiring the

mutex lock.

For: VIZ-5190
Signed-off-by: John Harrison 
---
  drivers/gpu/drm/i915/i915_drv.h |   8 ++
  drivers/gpu/drm/i915/i915_gem.c | 132 
+---

  drivers/gpu/drm/i915/i915_irq.c |   2 +
  drivers/gpu/drm/i915/intel_lrc.c|   1 +
  drivers/gpu/drm/i915/intel_ringbuffer.c |   1 +
  drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
  6 files changed, 136 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h 
b/drivers/gpu/drm/i915/i915_drv.h

index 61c3db2..d7f1aa5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2163,7 +2163,11 @@ void i915_gem_track_fb(struct 
drm_i915_gem_object *old,

  struct drm_i915_gem_request {
  /** Underlying object for implementing the signal/wait stuff. */
  struct fence fence;
+struct list_head signal_list;
+struct list_head unsignal_list;


In addition to what Daniel said (one list_head looks enough) it is 
customary to call it _link.



  struct list_head delay_free_list;
+bool cancelled;
+bool irq_enabled;

  /** On Which ring this request was generated */
  struct drm_i915_private *i915;
@@ -2241,6 +2245,10 @@ int i915_gem_request_alloc(struct 
intel_engine_cs *ring,

 struct drm_i915_gem_request **req_out);
  void i915_gem_request_cancel(struct drm_i915_gem_request *req);

+void i915_gem_request_submit(struct drm_i915_gem_request *req);
+void i915_gem_request_enable_interrupt(struct drm_i915_gem_request 
*req);

+void i915_gem_request_notify(struct intel_engine_cs *ring);
+
  int i915_create_fence_timeline(struct drm_device *dev,
 struct intel_context *ctx,
 struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem.c 
b/drivers/gpu/drm/i915/i915_gem.c

index 482835a..7c589a9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1222,6 +1222,11 @@ int __i915_wait_request(struct 
drm_i915_gem_request *req,

  if (list_empty(>list))
  return 0;

+/*
+ * Enable interrupt completion of the request.
+ */
+i915_gem_request_enable_interrupt(req);
+
  if (i915_gem_request_completed(req))
  return 0;

@@ -1382,6 +1387,10 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)

  list_del_init(>list);
  i915_gem_request_remove_from_client(request);

+/* In case the request is still in the signal pending list */
+if (!list_empty(>signal_list))
+request->cancelled = true;
+
  i915_gem_request_unreference(request);
  }

@@ -2534,6 +2543,12 @@ void 

Re: [Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences

2015-08-05 Thread Daniel Vetter
On Mon, Aug 03, 2015 at 10:20:29AM +0100, Tvrtko Ursulin wrote:
 
 On 07/27/2015 03:00 PM, Daniel Vetter wrote:
 On Mon, Jul 27, 2015 at 02:20:43PM +0100, Tvrtko Ursulin wrote:
 
 On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote:
 From: John Harrison john.c.harri...@intel.com
 
 The intended usage model for struct fence is that the signalled status 
 should be
 set on demand rather than polled. That is, there should not be a need for a
 'signaled' function to be called everytime the status is queried. Instead,
 'something' should be done to enable a signal callback from the hardware 
 which
 will update the state directly. In the case of requests, this is the seqno
 update interrupt. The idea is that this callback will only be enabled on 
 demand
 when something actually tries to wait on the fence.
 
 This change removes the polling test and replaces it with the callback 
 scheme.
 Each fence is added to a 'please poke me' list at the start of
 i915_add_request(). The interrupt handler then scans through the 'poke me' 
 list
 when a new seqno pops out and signals any matching fence/request. The 
 fence is
 then removed from the list so the entire request stack does not need to be
 scanned every time. Note that the fence is added to the list before the 
 commands
 to generate the seqno interrupt are added to the ring. Thus the sequence is
 guaranteed to be race free if the interrupt is already enabled.
 
 Note that the interrupt is only enabled on demand (i.e. when 
 __wait_request() is
 called). Thus there is still a potential race when enabling the interrupt 
 as the
 request may already have completed. However, this is simply solved by 
 calling
 the interrupt processing code immediately after enabling the interrupt and
 thereby checking for already completed requests.
 
 Lastly, the ring clean up code has the possibility to cancel outstanding
 requests (e.g. because TDR has reset the ring). These requests will never 
 get
 signalled and so must be removed from the signal list manually. This is 
 done by
 setting a 'cancelled' flag and then calling the regular notify/retire code 
 path
 rather than attempting to duplicate the list manipulatation and clean up 
 code in
 multiple places. This also avoid any race condition where the cancellation
 request might occur after/during the completion interrupt actually 
 arriving.
 
 v2: Updated to take advantage of the request unreference no longer 
 requiring the
 mutex lock.
 
 For: VIZ-5190
 Signed-off-by: John Harrison john.c.harri...@intel.com
 ---
 
 [snip]
 
 @@ -1382,6 +1387,10 @@ static void i915_gem_request_retire(struct 
 drm_i915_gem_request *request)
list_del_init(request-list);
i915_gem_request_remove_from_client(request);
 
 +  /* In case the request is still in the signal pending list */
 +  if (!list_empty(request-signal_list))
 +  request-cancelled = true;
 +
 
 Another thing I did not see implemented is the sync_fence error state.
 
 This is more about the Android part, but related to this canceled flag so I
 am commenting here.
 
 I thought when TDR kicks in and we set request-cancelled to true, there
 should be a code path which somehow makes sync_fence-status negative.
 
 As it is, because fence_signal will not be called on canceled, I thought
 waiters will wait until timeout, rather than being woken up and return error
 status.
 
 For this to work you would somehow need to make sync_fence-status go
 negative. With normal fence completion it goes from 1 - 0, via the
 completion callback. I did not immediately see how to make it go negative
 using the existing API.
 
 I think back when we did struct fence we decided that we won't care yet
 about forwarding error state since doing that across drivers if you have a
 chain of fences looked complicated. And no one had any clear idea about
 what kind of semantics we really want. If we want this we'd need to add
 it, but probably better to do that as a follow-up (usual caveat about
 open-source userspace and demonstration vehicles apply and all that).
 
 Hm, I am not sure but it feels to me not having an error state is a problem.
 Without it userspace can just keep waiting and waiting upon a fence even
 though the driver has discarded that workload and never plans to resubmit
 it. Am I missing something?

fences still must complete eventually, they simply won't tell you whether
the gpu died before completing the fence or not. Which is in line with gl
spec for fences and arb_robustness - inquiring about gpu hangs is done
through sideband apis.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences

2015-08-05 Thread Maarten Lankhorst
Op 05-08-15 om 10:05 schreef Daniel Vetter:
 On Mon, Aug 03, 2015 at 10:20:29AM +0100, Tvrtko Ursulin wrote:
 On 07/27/2015 03:00 PM, Daniel Vetter wrote:
 On Mon, Jul 27, 2015 at 02:20:43PM +0100, Tvrtko Ursulin wrote:
 On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote:
 From: John Harrison john.c.harri...@intel.com

 The intended usage model for struct fence is that the signalled status 
 should be
 set on demand rather than polled. That is, there should not be a need for 
 a
 'signaled' function to be called everytime the status is queried. Instead,
 'something' should be done to enable a signal callback from the hardware 
 which
 will update the state directly. In the case of requests, this is the seqno
 update interrupt. The idea is that this callback will only be enabled on 
 demand
 when something actually tries to wait on the fence.

 This change removes the polling test and replaces it with the callback 
 scheme.
 Each fence is added to a 'please poke me' list at the start of
 i915_add_request(). The interrupt handler then scans through the 'poke 
 me' list
 when a new seqno pops out and signals any matching fence/request. The 
 fence is
 then removed from the list so the entire request stack does not need to be
 scanned every time. Note that the fence is added to the list before the 
 commands
 to generate the seqno interrupt are added to the ring. Thus the sequence 
 is
 guaranteed to be race free if the interrupt is already enabled.

 Note that the interrupt is only enabled on demand (i.e. when 
 __wait_request() is
 called). Thus there is still a potential race when enabling the interrupt 
 as the
 request may already have completed. However, this is simply solved by 
 calling
 the interrupt processing code immediately after enabling the interrupt and
 thereby checking for already completed requests.

 Lastly, the ring clean up code has the possibility to cancel outstanding
 requests (e.g. because TDR has reset the ring). These requests will never 
 get
 signalled and so must be removed from the signal list manually. This is 
 done by
 setting a 'cancelled' flag and then calling the regular notify/retire 
 code path
 rather than attempting to duplicate the list manipulatation and clean up 
 code in
 multiple places. This also avoid any race condition where the cancellation
 request might occur after/during the completion interrupt actually 
 arriving.

 v2: Updated to take advantage of the request unreference no longer 
 requiring the
 mutex lock.

 For: VIZ-5190
 Signed-off-by: John Harrison john.c.harri...@intel.com
 ---
 [snip]

 @@ -1382,6 +1387,10 @@ static void i915_gem_request_retire(struct 
 drm_i915_gem_request *request)
   list_del_init(request-list);
   i915_gem_request_remove_from_client(request);

 + /* In case the request is still in the signal pending list */
 + if (!list_empty(request-signal_list))
 + request-cancelled = true;
 +
 Another thing I did not see implemented is the sync_fence error state.

 This is more about the Android part, but related to this canceled flag so I
 am commenting here.

 I thought when TDR kicks in and we set request-cancelled to true, there
 should be a code path which somehow makes sync_fence-status negative.

 As it is, because fence_signal will not be called on canceled, I thought
 waiters will wait until timeout, rather than being woken up and return 
 error
 status.

 For this to work you would somehow need to make sync_fence-status go
 negative. With normal fence completion it goes from 1 - 0, via the
 completion callback. I did not immediately see how to make it go negative
 using the existing API.
 I think back when we did struct fence we decided that we won't care yet
 about forwarding error state since doing that across drivers if you have a
 chain of fences looked complicated. And no one had any clear idea about
 what kind of semantics we really want. If we want this we'd need to add
 it, but probably better to do that as a follow-up (usual caveat about
 open-source userspace and demonstration vehicles apply and all that).
 Hm, I am not sure but it feels to me not having an error state is a problem.
 Without it userspace can just keep waiting and waiting upon a fence even
 though the driver has discarded that workload and never plans to resubmit
 it. Am I missing something?
 fences still must complete eventually, they simply won't tell you whether
 the gpu died before completing the fence or not. Which is in line with gl
 spec for fences and arb_robustness - inquiring about gpu hangs is done
 through sideband apis.
Actually you can tell. Set fence-status before signalling.

~Maarten
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences

2015-08-03 Thread Tvrtko Ursulin


On 07/27/2015 03:00 PM, Daniel Vetter wrote:

On Mon, Jul 27, 2015 at 02:20:43PM +0100, Tvrtko Ursulin wrote:


On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote:

From: John Harrison john.c.harri...@intel.com

The intended usage model for struct fence is that the signalled status should be
set on demand rather than polled. That is, there should not be a need for a
'signaled' function to be called everytime the status is queried. Instead,
'something' should be done to enable a signal callback from the hardware which
will update the state directly. In the case of requests, this is the seqno
update interrupt. The idea is that this callback will only be enabled on demand
when something actually tries to wait on the fence.

This change removes the polling test and replaces it with the callback scheme.
Each fence is added to a 'please poke me' list at the start of
i915_add_request(). The interrupt handler then scans through the 'poke me' list
when a new seqno pops out and signals any matching fence/request. The fence is
then removed from the list so the entire request stack does not need to be
scanned every time. Note that the fence is added to the list before the commands
to generate the seqno interrupt are added to the ring. Thus the sequence is
guaranteed to be race free if the interrupt is already enabled.

Note that the interrupt is only enabled on demand (i.e. when __wait_request() is
called). Thus there is still a potential race when enabling the interrupt as the
request may already have completed. However, this is simply solved by calling
the interrupt processing code immediately after enabling the interrupt and
thereby checking for already completed requests.

Lastly, the ring clean up code has the possibility to cancel outstanding
requests (e.g. because TDR has reset the ring). These requests will never get
signalled and so must be removed from the signal list manually. This is done by
setting a 'cancelled' flag and then calling the regular notify/retire code path
rather than attempting to duplicate the list manipulatation and clean up code in
multiple places. This also avoid any race condition where the cancellation
request might occur after/during the completion interrupt actually arriving.

v2: Updated to take advantage of the request unreference no longer requiring the
mutex lock.

For: VIZ-5190
Signed-off-by: John Harrison john.c.harri...@intel.com
---


[snip]


@@ -1382,6 +1387,10 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
list_del_init(request-list);
i915_gem_request_remove_from_client(request);

+   /* In case the request is still in the signal pending list */
+   if (!list_empty(request-signal_list))
+   request-cancelled = true;
+


Another thing I did not see implemented is the sync_fence error state.

This is more about the Android part, but related to this canceled flag so I
am commenting here.

I thought when TDR kicks in and we set request-cancelled to true, there
should be a code path which somehow makes sync_fence-status negative.

As it is, because fence_signal will not be called on canceled, I thought
waiters will wait until timeout, rather than being woken up and return error
status.

For this to work you would somehow need to make sync_fence-status go
negative. With normal fence completion it goes from 1 - 0, via the
completion callback. I did not immediately see how to make it go negative
using the existing API.


I think back when we did struct fence we decided that we won't care yet
about forwarding error state since doing that across drivers if you have a
chain of fences looked complicated. And no one had any clear idea about
what kind of semantics we really want. If we want this we'd need to add
it, but probably better to do that as a follow-up (usual caveat about
open-source userspace and demonstration vehicles apply and all that).


Hm, I am not sure but it feels to me not having an error state is a 
problem. Without it userspace can just keep waiting and waiting upon a 
fence even though the driver has discarded that workload and never plans 
to resubmit it. Am I missing something?


Tvrtko
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences

2015-07-27 Thread Tvrtko Ursulin


Hi,

On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote:

From: John Harrison john.c.harri...@intel.com

The intended usage model for struct fence is that the signalled status should be
set on demand rather than polled. That is, there should not be a need for a
'signaled' function to be called everytime the status is queried. Instead,
'something' should be done to enable a signal callback from the hardware which
will update the state directly. In the case of requests, this is the seqno
update interrupt. The idea is that this callback will only be enabled on demand
when something actually tries to wait on the fence.

This change removes the polling test and replaces it with the callback scheme.
Each fence is added to a 'please poke me' list at the start of
i915_add_request(). The interrupt handler then scans through the 'poke me' list
when a new seqno pops out and signals any matching fence/request. The fence is
then removed from the list so the entire request stack does not need to be
scanned every time. Note that the fence is added to the list before the commands
to generate the seqno interrupt are added to the ring. Thus the sequence is
guaranteed to be race free if the interrupt is already enabled.

Note that the interrupt is only enabled on demand (i.e. when __wait_request() is
called). Thus there is still a potential race when enabling the interrupt as the
request may already have completed. However, this is simply solved by calling
the interrupt processing code immediately after enabling the interrupt and
thereby checking for already completed requests.

Lastly, the ring clean up code has the possibility to cancel outstanding
requests (e.g. because TDR has reset the ring). These requests will never get
signalled and so must be removed from the signal list manually. This is done by
setting a 'cancelled' flag and then calling the regular notify/retire code path
rather than attempting to duplicate the list manipulatation and clean up code in
multiple places. This also avoid any race condition where the cancellation
request might occur after/during the completion interrupt actually arriving.

v2: Updated to take advantage of the request unreference no longer requiring the
mutex lock.

For: VIZ-5190
Signed-off-by: John Harrison john.c.harri...@intel.com
---
  drivers/gpu/drm/i915/i915_drv.h |   8 ++
  drivers/gpu/drm/i915/i915_gem.c | 132 +---
  drivers/gpu/drm/i915/i915_irq.c |   2 +
  drivers/gpu/drm/i915/intel_lrc.c|   1 +
  drivers/gpu/drm/i915/intel_ringbuffer.c |   1 +
  drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
  6 files changed, 136 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 61c3db2..d7f1aa5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2163,7 +2163,11 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
  struct drm_i915_gem_request {
/** Underlying object for implementing the signal/wait stuff. */
struct fence fence;
+   struct list_head signal_list;
+   struct list_head unsignal_list;


In addition to what Daniel said (one list_head looks enough) it is 
customary to call it _link.



struct list_head delay_free_list;
+   bool cancelled;
+   bool irq_enabled;

/** On Which ring this request was generated */
struct drm_i915_private *i915;
@@ -2241,6 +2245,10 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
   struct drm_i915_gem_request **req_out);
  void i915_gem_request_cancel(struct drm_i915_gem_request *req);

+void i915_gem_request_submit(struct drm_i915_gem_request *req);
+void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req);
+void i915_gem_request_notify(struct intel_engine_cs *ring);
+
  int i915_create_fence_timeline(struct drm_device *dev,
   struct intel_context *ctx,
   struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 482835a..7c589a9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1222,6 +1222,11 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
if (list_empty(req-list))
return 0;

+   /*
+* Enable interrupt completion of the request.
+*/
+   i915_gem_request_enable_interrupt(req);
+
if (i915_gem_request_completed(req))
return 0;

@@ -1382,6 +1387,10 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
list_del_init(request-list);
i915_gem_request_remove_from_client(request);

+   /* In case the request is still in the signal pending list */
+   if (!list_empty(request-signal_list))
+   request-cancelled = true;
+
i915_gem_request_unreference(request);
  }

@@ -2534,6 +2543,12 

Re: [Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences

2015-07-27 Thread Daniel Vetter
On Mon, Jul 27, 2015 at 02:20:43PM +0100, Tvrtko Ursulin wrote:
 
 On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote:
 From: John Harrison john.c.harri...@intel.com
 
 The intended usage model for struct fence is that the signalled status 
 should be
 set on demand rather than polled. That is, there should not be a need for a
 'signaled' function to be called everytime the status is queried. Instead,
 'something' should be done to enable a signal callback from the hardware 
 which
 will update the state directly. In the case of requests, this is the seqno
 update interrupt. The idea is that this callback will only be enabled on 
 demand
 when something actually tries to wait on the fence.
 
 This change removes the polling test and replaces it with the callback 
 scheme.
 Each fence is added to a 'please poke me' list at the start of
 i915_add_request(). The interrupt handler then scans through the 'poke me' 
 list
 when a new seqno pops out and signals any matching fence/request. The fence 
 is
 then removed from the list so the entire request stack does not need to be
 scanned every time. Note that the fence is added to the list before the 
 commands
 to generate the seqno interrupt are added to the ring. Thus the sequence is
 guaranteed to be race free if the interrupt is already enabled.
 
 Note that the interrupt is only enabled on demand (i.e. when 
 __wait_request() is
 called). Thus there is still a potential race when enabling the interrupt as 
 the
 request may already have completed. However, this is simply solved by calling
 the interrupt processing code immediately after enabling the interrupt and
 thereby checking for already completed requests.
 
 Lastly, the ring clean up code has the possibility to cancel outstanding
 requests (e.g. because TDR has reset the ring). These requests will never get
 signalled and so must be removed from the signal list manually. This is done 
 by
 setting a 'cancelled' flag and then calling the regular notify/retire code 
 path
 rather than attempting to duplicate the list manipulatation and clean up 
 code in
 multiple places. This also avoid any race condition where the cancellation
 request might occur after/during the completion interrupt actually arriving.
 
 v2: Updated to take advantage of the request unreference no longer requiring 
 the
 mutex lock.
 
 For: VIZ-5190
 Signed-off-by: John Harrison john.c.harri...@intel.com
 ---
 
 [snip]
 
 @@ -1382,6 +1387,10 @@ static void i915_gem_request_retire(struct 
 drm_i915_gem_request *request)
  list_del_init(request-list);
  i915_gem_request_remove_from_client(request);
 
 +/* In case the request is still in the signal pending list */
 +if (!list_empty(request-signal_list))
 +request-cancelled = true;
 +
 
 Another thing I did not see implemented is the sync_fence error state.
 
 This is more about the Android part, but related to this canceled flag so I
 am commenting here.
 
 I thought when TDR kicks in and we set request-cancelled to true, there
 should be a code path which somehow makes sync_fence-status negative.
 
 As it is, because fence_signal will not be called on canceled, I thought
 waiters will wait until timeout, rather than being woken up and return error
 status.
 
 For this to work you would somehow need to make sync_fence-status go
 negative. With normal fence completion it goes from 1 - 0, via the
 completion callback. I did not immediately see how to make it go negative
 using the existing API.

I think back when we did struct fence we decided that we won't care yet
about forwarding error state since doing that across drivers if you have a
chain of fences looked complicated. And no one had any clear idea about
what kind of semantics we really want. If we want this we'd need to add
it, but probably better to do that as a follow-up (usual caveat about
open-source userspace and demonstration vehicles apply and all that).
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences

2015-07-27 Thread Tvrtko Ursulin


On 07/17/2015 03:31 PM, john.c.harri...@intel.com wrote:

From: John Harrison john.c.harri...@intel.com

The intended usage model for struct fence is that the signalled status should be
set on demand rather than polled. That is, there should not be a need for a
'signaled' function to be called everytime the status is queried. Instead,
'something' should be done to enable a signal callback from the hardware which
will update the state directly. In the case of requests, this is the seqno
update interrupt. The idea is that this callback will only be enabled on demand
when something actually tries to wait on the fence.

This change removes the polling test and replaces it with the callback scheme.
Each fence is added to a 'please poke me' list at the start of
i915_add_request(). The interrupt handler then scans through the 'poke me' list
when a new seqno pops out and signals any matching fence/request. The fence is
then removed from the list so the entire request stack does not need to be
scanned every time. Note that the fence is added to the list before the commands
to generate the seqno interrupt are added to the ring. Thus the sequence is
guaranteed to be race free if the interrupt is already enabled.

Note that the interrupt is only enabled on demand (i.e. when __wait_request() is
called). Thus there is still a potential race when enabling the interrupt as the
request may already have completed. However, this is simply solved by calling
the interrupt processing code immediately after enabling the interrupt and
thereby checking for already completed requests.

Lastly, the ring clean up code has the possibility to cancel outstanding
requests (e.g. because TDR has reset the ring). These requests will never get
signalled and so must be removed from the signal list manually. This is done by
setting a 'cancelled' flag and then calling the regular notify/retire code path
rather than attempting to duplicate the list manipulatation and clean up code in
multiple places. This also avoid any race condition where the cancellation
request might occur after/during the completion interrupt actually arriving.

v2: Updated to take advantage of the request unreference no longer requiring the
mutex lock.

For: VIZ-5190
Signed-off-by: John Harrison john.c.harri...@intel.com
---


[snip]


@@ -1382,6 +1387,10 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
list_del_init(request-list);
i915_gem_request_remove_from_client(request);

+   /* In case the request is still in the signal pending list */
+   if (!list_empty(request-signal_list))
+   request-cancelled = true;
+


Another thing I did not see implemented is the sync_fence error state.

This is more about the Android part, but related to this canceled flag 
so I am commenting here.


I thought when TDR kicks in and we set request-cancelled to true, there 
should be a code path which somehow makes sync_fence-status negative.


As it is, because fence_signal will not be called on canceled, I thought 
waiters will wait until timeout, rather than being woken up and return 
error status.


For this to work you would somehow need to make sync_fence-status go 
negative. With normal fence completion it goes from 1 - 0, via the 
completion callback. I did not immediately see how to make it go 
negative using the existing API.


Regards,

Tvrtko


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences

2015-07-21 Thread Daniel Vetter
On Fri, Jul 17, 2015 at 03:31:21PM +0100, john.c.harri...@intel.com wrote:
 From: John Harrison john.c.harri...@intel.com
 
 The intended usage model for struct fence is that the signalled status should 
 be
 set on demand rather than polled. That is, there should not be a need for a
 'signaled' function to be called everytime the status is queried. Instead,
 'something' should be done to enable a signal callback from the hardware which
 will update the state directly. In the case of requests, this is the seqno
 update interrupt. The idea is that this callback will only be enabled on 
 demand
 when something actually tries to wait on the fence.
 
 This change removes the polling test and replaces it with the callback scheme.
 Each fence is added to a 'please poke me' list at the start of
 i915_add_request(). The interrupt handler then scans through the 'poke me' 
 list
 when a new seqno pops out and signals any matching fence/request. The fence is
 then removed from the list so the entire request stack does not need to be
 scanned every time. Note that the fence is added to the list before the 
 commands
 to generate the seqno interrupt are added to the ring. Thus the sequence is
 guaranteed to be race free if the interrupt is already enabled.
 
 Note that the interrupt is only enabled on demand (i.e. when __wait_request() 
 is
 called). Thus there is still a potential race when enabling the interrupt as 
 the
 request may already have completed. However, this is simply solved by calling
 the interrupt processing code immediately after enabling the interrupt and
 thereby checking for already completed requests.
 
 Lastly, the ring clean up code has the possibility to cancel outstanding
 requests (e.g. because TDR has reset the ring). These requests will never get
 signalled and so must be removed from the signal list manually. This is done 
 by
 setting a 'cancelled' flag and then calling the regular notify/retire code 
 path
 rather than attempting to duplicate the list manipulatation and clean up code 
 in
 multiple places. This also avoid any race condition where the cancellation
 request might occur after/during the completion interrupt actually arriving.
 
 v2: Updated to take advantage of the request unreference no longer requiring 
 the
 mutex lock.
 
 For: VIZ-5190
 Signed-off-by: John Harrison john.c.harri...@intel.com
 ---
  drivers/gpu/drm/i915/i915_drv.h |   8 ++
  drivers/gpu/drm/i915/i915_gem.c | 132 
 +---
  drivers/gpu/drm/i915/i915_irq.c |   2 +
  drivers/gpu/drm/i915/intel_lrc.c|   1 +
  drivers/gpu/drm/i915/intel_ringbuffer.c |   1 +
  drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
  6 files changed, 136 insertions(+), 9 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index 61c3db2..d7f1aa5 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -2163,7 +2163,11 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
  struct drm_i915_gem_request {
   /** Underlying object for implementing the signal/wait stuff. */
   struct fence fence;
 + struct list_head signal_list;
 + struct list_head unsignal_list;
   struct list_head delay_free_list;

From a very quick look a request can only ever be on one of these lists.
It would be clearer to just use one list and list_move to make that
reassignement and changing of ownership clearer.

 + bool cancelled;
 + bool irq_enabled;
  
   /** On Which ring this request was generated */
   struct drm_i915_private *i915;
 @@ -2241,6 +2245,10 @@ int i915_gem_request_alloc(struct intel_engine_cs 
 *ring,
  struct drm_i915_gem_request **req_out);
  void i915_gem_request_cancel(struct drm_i915_gem_request *req);
  
 +void i915_gem_request_submit(struct drm_i915_gem_request *req);
 +void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req);
 +void i915_gem_request_notify(struct intel_engine_cs *ring);
 +
  int i915_create_fence_timeline(struct drm_device *dev,
  struct intel_context *ctx,
  struct intel_engine_cs *ring);
 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
 index 482835a..7c589a9 100644
 --- a/drivers/gpu/drm/i915/i915_gem.c
 +++ b/drivers/gpu/drm/i915/i915_gem.c
 @@ -1222,6 +1222,11 @@ int __i915_wait_request(struct drm_i915_gem_request 
 *req,
   if (list_empty(req-list))
   return 0;
  
 + /*
 +  * Enable interrupt completion of the request.
 +  */
 + i915_gem_request_enable_interrupt(req);
 +
   if (i915_gem_request_completed(req))
   return 0;
  
 @@ -1382,6 +1387,10 @@ static void i915_gem_request_retire(struct 
 drm_i915_gem_request *request)
   list_del_init(request-list);
   i915_gem_request_remove_from_client(request);
  
 + /* In case the request is still in the signal 

Re: [Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences

2015-07-20 Thread Maarten Lankhorst
Op 17-07-15 om 16:31 schreef john.c.harri...@intel.com:
 From: John Harrison john.c.harri...@intel.com

 The intended usage model for struct fence is that the signalled status should 
 be
 set on demand rather than polled. That is, there should not be a need for a
 'signaled' function to be called everytime the status is queried. Instead,
 'something' should be done to enable a signal callback from the hardware which
 will update the state directly. In the case of requests, this is the seqno
 update interrupt. The idea is that this callback will only be enabled on 
 demand
 when something actually tries to wait on the fence.

 This change removes the polling test and replaces it with the callback scheme.
 Each fence is added to a 'please poke me' list at the start of
 i915_add_request(). The interrupt handler then scans through the 'poke me' 
 list
 when a new seqno pops out and signals any matching fence/request. The fence is
 then removed from the list so the entire request stack does not need to be
 scanned every time. Note that the fence is added to the list before the 
 commands
 to generate the seqno interrupt are added to the ring. Thus the sequence is
 guaranteed to be race free if the interrupt is already enabled.

 Note that the interrupt is only enabled on demand (i.e. when __wait_request() 
 is
 called). Thus there is still a potential race when enabling the interrupt as 
 the
 request may already have completed. However, this is simply solved by calling
 the interrupt processing code immediately after enabling the interrupt and
 thereby checking for already completed requests.
This race will happen on any enable_signaling implementation, just something to 
be aware of.
 Lastly, the ring clean up code has the possibility to cancel outstanding
 requests (e.g. because TDR has reset the ring). These requests will never get
 signalled and so must be removed from the signal list manually. This is done 
 by
 setting a 'cancelled' flag and then calling the regular notify/retire code 
 path
 rather than attempting to duplicate the list manipulatation and clean up code 
 in
 multiple places. This also avoid any race condition where the cancellation
 request might occur after/during the completion interrupt actually arriving.

I notice in this commit you only clean up requests when the refcount drops to 0.
What resources need to be kept after the fence is signaled? Can't you queue the
delayed work from the function that signals the fence and make sure it holds a
refcount?

I'm curious because if userspace can grab a reference for a fence it might lead
to not freeing resources for a long time..

~Maarten

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 7/9] drm/i915: Interrupt driven fences

2015-07-17 Thread John . C . Harrison
From: John Harrison john.c.harri...@intel.com

The intended usage model for struct fence is that the signalled status should be
set on demand rather than polled. That is, there should not be a need for a
'signaled' function to be called everytime the status is queried. Instead,
'something' should be done to enable a signal callback from the hardware which
will update the state directly. In the case of requests, this is the seqno
update interrupt. The idea is that this callback will only be enabled on demand
when something actually tries to wait on the fence.

This change removes the polling test and replaces it with the callback scheme.
Each fence is added to a 'please poke me' list at the start of
i915_add_request(). The interrupt handler then scans through the 'poke me' list
when a new seqno pops out and signals any matching fence/request. The fence is
then removed from the list so the entire request stack does not need to be
scanned every time. Note that the fence is added to the list before the commands
to generate the seqno interrupt are added to the ring. Thus the sequence is
guaranteed to be race free if the interrupt is already enabled.

Note that the interrupt is only enabled on demand (i.e. when __wait_request() is
called). Thus there is still a potential race when enabling the interrupt as the
request may already have completed. However, this is simply solved by calling
the interrupt processing code immediately after enabling the interrupt and
thereby checking for already completed requests.

Lastly, the ring clean up code has the possibility to cancel outstanding
requests (e.g. because TDR has reset the ring). These requests will never get
signalled and so must be removed from the signal list manually. This is done by
setting a 'cancelled' flag and then calling the regular notify/retire code path
rather than attempting to duplicate the list manipulatation and clean up code in
multiple places. This also avoid any race condition where the cancellation
request might occur after/during the completion interrupt actually arriving.

v2: Updated to take advantage of the request unreference no longer requiring the
mutex lock.

For: VIZ-5190
Signed-off-by: John Harrison john.c.harri...@intel.com
---
 drivers/gpu/drm/i915/i915_drv.h |   8 ++
 drivers/gpu/drm/i915/i915_gem.c | 132 +---
 drivers/gpu/drm/i915/i915_irq.c |   2 +
 drivers/gpu/drm/i915/intel_lrc.c|   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
 6 files changed, 136 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 61c3db2..d7f1aa5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2163,7 +2163,11 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 struct drm_i915_gem_request {
/** Underlying object for implementing the signal/wait stuff. */
struct fence fence;
+   struct list_head signal_list;
+   struct list_head unsignal_list;
struct list_head delay_free_list;
+   bool cancelled;
+   bool irq_enabled;
 
/** On Which ring this request was generated */
struct drm_i915_private *i915;
@@ -2241,6 +2245,10 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
   struct drm_i915_gem_request **req_out);
 void i915_gem_request_cancel(struct drm_i915_gem_request *req);
 
+void i915_gem_request_submit(struct drm_i915_gem_request *req);
+void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req);
+void i915_gem_request_notify(struct intel_engine_cs *ring);
+
 int i915_create_fence_timeline(struct drm_device *dev,
   struct intel_context *ctx,
   struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 482835a..7c589a9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1222,6 +1222,11 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
if (list_empty(req-list))
return 0;
 
+   /*
+* Enable interrupt completion of the request.
+*/
+   i915_gem_request_enable_interrupt(req);
+
if (i915_gem_request_completed(req))
return 0;
 
@@ -1382,6 +1387,10 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
list_del_init(request-list);
i915_gem_request_remove_from_client(request);
 
+   /* In case the request is still in the signal pending list */
+   if (!list_empty(request-signal_list))
+   request-cancelled = true;
+
i915_gem_request_unreference(request);
 }
 
@@ -2534,6 +2543,12 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
 */
request-postfix = intel_ring_get_tail(ringbuf);
 
+   /*
+* Add the