Re: [Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request
On 26/05/2020 10:07, Chris Wilson wrote: Reorder the code so that we can reuse the await_execution from a special case in await_request in the next patch. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_request.c | 264 ++-- 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c282719ad3ac..33bbad623e02 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1053,37 +1053,91 @@ emit_semaphore_wait(struct i915_request *to, I915_FENCE_GFP); } +static bool intel_timeline_sync_has_start(struct intel_timeline *tl, + struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, + fence->context, + fence->seqno - 1); +} + +static int intel_timeline_sync_set_start(struct intel_timeline *tl, +const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); +} + static int -i915_request_await_request(struct i915_request *to, struct i915_request *from) +__i915_request_await_execution(struct i915_request *to, + struct i915_request *from, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) { - int ret; + int err; - GEM_BUG_ON(to == from); - GEM_BUG_ON(to->timeline == from->timeline); + GEM_BUG_ON(intel_context_is_barrier(from->context)); - if (i915_request_completed(from)) { - i915_sw_fence_set_error_once(>submit, from->fence.error); + /* Submit both requests at the same time */ + err = __await_execution(to, from, hook, I915_FENCE_GFP); + if (err) + return err; + + /* Squash repeated depenendices to the same timelines */ + if (intel_timeline_sync_has_start(i915_request_timeline(to), + >fence)) return 0; + + /* +* Wait until the start of this request. +* +* The execution cb fires when we submit the request to HW. But in +* many cases this may be long before the request itself is ready to +* run (consider that we submit 2 requests for the same context, where +* the request of interest is behind an indefinite spinner). So we hook +* up to both to reduce our queues and keep the execution lag minimised +* in the worst case, though we hope that the await_start is elided. +*/ + err = i915_request_await_start(to, from); + if (err < 0) + return err; + + /* +* Ensure both start together [after all semaphores in signal] +* +* Now that we are queued to the HW at roughly the same time (thanks +* to the execute cb) and are ready to run at roughly the same time +* (thanks to the await start), our signaler may still be indefinitely +* delayed by waiting on a semaphore from a remote engine. If our +* signaler depends on a semaphore, so indirectly do we, and we do not +* want to start our payload until our signaler also starts theirs. +* So we wait. +* +* However, there is also a second condition for which we need to wait +* for the precise start of the signaler. Consider that the signaler +* was submitted in a chain of requests following another context +* (with just an ordinary intra-engine fence dependency between the +* two). In this case the signaler is queued to HW, but not for +* immediate execution, and so we must wait until it reaches the +* active slot. +*/ + if (intel_engine_has_semaphores(to->engine) && + !i915_request_has_initial_breadcrumb(to)) { + err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); + if (err < 0) + return err; } + /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - ret = i915_sched_node_add_dependency(>sched, + err = i915_sched_node_add_dependency(>sched, >sched, -I915_DEPENDENCY_EXTERNAL); - if (ret < 0) - return ret; +I915_DEPENDENCY_WEAK); + if (err < 0) + return err; } - if (to->engine == from->engine) - ret = i915_sw_fence_await_sw_fence_gfp(>submit, - >submit, -
[Intel-gfx] [PATCH 1/2] drm/i915: Reorder await_execution before await_request
Reorder the code so that we can reuse the await_execution from a special case in await_request in the next patch. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_request.c | 264 ++-- 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c282719ad3ac..33bbad623e02 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1053,37 +1053,91 @@ emit_semaphore_wait(struct i915_request *to, I915_FENCE_GFP); } +static bool intel_timeline_sync_has_start(struct intel_timeline *tl, + struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, + fence->context, + fence->seqno - 1); +} + +static int intel_timeline_sync_set_start(struct intel_timeline *tl, +const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); +} + static int -i915_request_await_request(struct i915_request *to, struct i915_request *from) +__i915_request_await_execution(struct i915_request *to, + struct i915_request *from, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) { - int ret; + int err; - GEM_BUG_ON(to == from); - GEM_BUG_ON(to->timeline == from->timeline); + GEM_BUG_ON(intel_context_is_barrier(from->context)); - if (i915_request_completed(from)) { - i915_sw_fence_set_error_once(>submit, from->fence.error); + /* Submit both requests at the same time */ + err = __await_execution(to, from, hook, I915_FENCE_GFP); + if (err) + return err; + + /* Squash repeated depenendices to the same timelines */ + if (intel_timeline_sync_has_start(i915_request_timeline(to), + >fence)) return 0; + + /* +* Wait until the start of this request. +* +* The execution cb fires when we submit the request to HW. But in +* many cases this may be long before the request itself is ready to +* run (consider that we submit 2 requests for the same context, where +* the request of interest is behind an indefinite spinner). So we hook +* up to both to reduce our queues and keep the execution lag minimised +* in the worst case, though we hope that the await_start is elided. +*/ + err = i915_request_await_start(to, from); + if (err < 0) + return err; + + /* +* Ensure both start together [after all semaphores in signal] +* +* Now that we are queued to the HW at roughly the same time (thanks +* to the execute cb) and are ready to run at roughly the same time +* (thanks to the await start), our signaler may still be indefinitely +* delayed by waiting on a semaphore from a remote engine. If our +* signaler depends on a semaphore, so indirectly do we, and we do not +* want to start our payload until our signaler also starts theirs. +* So we wait. +* +* However, there is also a second condition for which we need to wait +* for the precise start of the signaler. Consider that the signaler +* was submitted in a chain of requests following another context +* (with just an ordinary intra-engine fence dependency between the +* two). In this case the signaler is queued to HW, but not for +* immediate execution, and so we must wait until it reaches the +* active slot. +*/ + if (intel_engine_has_semaphores(to->engine) && + !i915_request_has_initial_breadcrumb(to)) { + err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); + if (err < 0) + return err; } + /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - ret = i915_sched_node_add_dependency(>sched, + err = i915_sched_node_add_dependency(>sched, >sched, -I915_DEPENDENCY_EXTERNAL); - if (ret < 0) - return ret; +I915_DEPENDENCY_WEAK); + if (err < 0) + return err; } - if (to->engine == from->engine) - ret = i915_sw_fence_await_sw_fence_gfp(>submit, - >submit, -