Re: [Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request

2020-05-27 Thread Tvrtko Ursulin



On 26/05/2020 10:07, Chris Wilson wrote:

Reorder the code so that we can reuse the await_execution from a special
case in await_request in the next patch.

Signed-off-by: Chris Wilson 
---
  drivers/gpu/drm/i915/i915_request.c | 264 ++--
  1 file changed, 132 insertions(+), 132 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index c282719ad3ac..33bbad623e02 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1053,37 +1053,91 @@ emit_semaphore_wait(struct i915_request *to,
 I915_FENCE_GFP);
  }
  
+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,

+ struct dma_fence *fence)
+{
+   return __intel_timeline_sync_is_later(tl,
+ fence->context,
+ fence->seqno - 1);
+}
+
+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
+const struct dma_fence *fence)
+{
+   return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
+}
+
  static int
-i915_request_await_request(struct i915_request *to, struct i915_request *from)
+__i915_request_await_execution(struct i915_request *to,
+  struct i915_request *from,
+  void (*hook)(struct i915_request *rq,
+   struct dma_fence *signal))
  {
-   int ret;
+   int err;
  
-	GEM_BUG_ON(to == from);

-   GEM_BUG_ON(to->timeline == from->timeline);
+   GEM_BUG_ON(intel_context_is_barrier(from->context));
  
-	if (i915_request_completed(from)) {

-   i915_sw_fence_set_error_once(>submit, from->fence.error);
+   /* Submit both requests at the same time */
+   err = __await_execution(to, from, hook, I915_FENCE_GFP);
+   if (err)
+   return err;
+
+   /* Squash repeated depenendices to the same timelines */
+   if (intel_timeline_sync_has_start(i915_request_timeline(to),
+ >fence))
return 0;
+
+   /*
+* Wait until the start of this request.
+*
+* The execution cb fires when we submit the request to HW. But in
+* many cases this may be long before the request itself is ready to
+* run (consider that we submit 2 requests for the same context, where
+* the request of interest is behind an indefinite spinner). So we hook
+* up to both to reduce our queues and keep the execution lag minimised
+* in the worst case, though we hope that the await_start is elided.
+*/
+   err = i915_request_await_start(to, from);
+   if (err < 0)
+   return err;
+
+   /*
+* Ensure both start together [after all semaphores in signal]
+*
+* Now that we are queued to the HW at roughly the same time (thanks
+* to the execute cb) and are ready to run at roughly the same time
+* (thanks to the await start), our signaler may still be indefinitely
+* delayed by waiting on a semaphore from a remote engine. If our
+* signaler depends on a semaphore, so indirectly do we, and we do not
+* want to start our payload until our signaler also starts theirs.
+* So we wait.
+*
+* However, there is also a second condition for which we need to wait
+* for the precise start of the signaler. Consider that the signaler
+* was submitted in a chain of requests following another context
+* (with just an ordinary intra-engine fence dependency between the
+* two). In this case the signaler is queued to HW, but not for
+* immediate execution, and so we must wait until it reaches the
+* active slot.
+*/
+   if (intel_engine_has_semaphores(to->engine) &&
+   !i915_request_has_initial_breadcrumb(to)) {
+   err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+   if (err < 0)
+   return err;
}
  
+	/* Couple the dependency tree for PI on this exposed to->fence */

if (to->engine->schedule) {
-   ret = i915_sched_node_add_dependency(>sched,
+   err = i915_sched_node_add_dependency(>sched,
 >sched,
-I915_DEPENDENCY_EXTERNAL);
-   if (ret < 0)
-   return ret;
+I915_DEPENDENCY_WEAK);
+   if (err < 0)
+   return err;
}
  
-	if (to->engine == from->engine)

-   ret = i915_sw_fence_await_sw_fence_gfp(>submit,
-  >submit,
- 

[Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request

2020-05-26 Thread Chris Wilson
Reorder the code so that we can reuse the await_execution from a special
case in await_request in the next patch.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_request.c | 264 ++--
 1 file changed, 132 insertions(+), 132 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index c282719ad3ac..33bbad623e02 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1053,37 +1053,91 @@ emit_semaphore_wait(struct i915_request *to,
 I915_FENCE_GFP);
 }
 
+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
+ struct dma_fence *fence)
+{
+   return __intel_timeline_sync_is_later(tl,
+ fence->context,
+ fence->seqno - 1);
+}
+
+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
+const struct dma_fence *fence)
+{
+   return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
+}
+
 static int
-i915_request_await_request(struct i915_request *to, struct i915_request *from)
+__i915_request_await_execution(struct i915_request *to,
+  struct i915_request *from,
+  void (*hook)(struct i915_request *rq,
+   struct dma_fence *signal))
 {
-   int ret;
+   int err;
 
-   GEM_BUG_ON(to == from);
-   GEM_BUG_ON(to->timeline == from->timeline);
+   GEM_BUG_ON(intel_context_is_barrier(from->context));
 
-   if (i915_request_completed(from)) {
-   i915_sw_fence_set_error_once(>submit, from->fence.error);
+   /* Submit both requests at the same time */
+   err = __await_execution(to, from, hook, I915_FENCE_GFP);
+   if (err)
+   return err;
+
+   /* Squash repeated depenendices to the same timelines */
+   if (intel_timeline_sync_has_start(i915_request_timeline(to),
+ >fence))
return 0;
+
+   /*
+* Wait until the start of this request.
+*
+* The execution cb fires when we submit the request to HW. But in
+* many cases this may be long before the request itself is ready to
+* run (consider that we submit 2 requests for the same context, where
+* the request of interest is behind an indefinite spinner). So we hook
+* up to both to reduce our queues and keep the execution lag minimised
+* in the worst case, though we hope that the await_start is elided.
+*/
+   err = i915_request_await_start(to, from);
+   if (err < 0)
+   return err;
+
+   /*
+* Ensure both start together [after all semaphores in signal]
+*
+* Now that we are queued to the HW at roughly the same time (thanks
+* to the execute cb) and are ready to run at roughly the same time
+* (thanks to the await start), our signaler may still be indefinitely
+* delayed by waiting on a semaphore from a remote engine. If our
+* signaler depends on a semaphore, so indirectly do we, and we do not
+* want to start our payload until our signaler also starts theirs.
+* So we wait.
+*
+* However, there is also a second condition for which we need to wait
+* for the precise start of the signaler. Consider that the signaler
+* was submitted in a chain of requests following another context
+* (with just an ordinary intra-engine fence dependency between the
+* two). In this case the signaler is queued to HW, but not for
+* immediate execution, and so we must wait until it reaches the
+* active slot.
+*/
+   if (intel_engine_has_semaphores(to->engine) &&
+   !i915_request_has_initial_breadcrumb(to)) {
+   err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+   if (err < 0)
+   return err;
}
 
+   /* Couple the dependency tree for PI on this exposed to->fence */
if (to->engine->schedule) {
-   ret = i915_sched_node_add_dependency(>sched,
+   err = i915_sched_node_add_dependency(>sched,
 >sched,
-I915_DEPENDENCY_EXTERNAL);
-   if (ret < 0)
-   return ret;
+I915_DEPENDENCY_WEAK);
+   if (err < 0)
+   return err;
}
 
-   if (to->engine == from->engine)
-   ret = i915_sw_fence_await_sw_fence_gfp(>submit,
-  >submit,
-