The handling of the no-preemption priority level imposes the restriction
that we need to maintain the implied ordering even though preemption is
disabled. Otherwise we may end up with an AB-BA deadlock across multiple
engine due to a real preemption event reordering the no-preemption
WAITs. To resolve this issue we currently promote all requests to WAIT
on unsubmission, however this interferes with the timeslicing
requirement that we do not apply any implicit promotion that will defeat
the round-robin timeslice list. (If we automatically promote the active
request it will go back to the head of the queue and not the tail!)

So we need implicit promotion to prevent reordering around semaphores
where we are not allowed to preempt, and we must avoid implicit
promotion on unsubmission. So instead of at unsubmit, if we apply that
implicit promotion on adding the dependency, we avoid the semaphore
deadlock and we also reduce the gains made by the promotion for user
space waiting. Furthermore, by keeping the earlier dependencies at a
higher level, we reduce the search space for timeslicing without
altering runtime scheduling too badly (no dependencies at all will be
assigned a higher priority for rrul).

v2: Limit the bump to external edges (as originally intended) i.e.
between contexts and out to the user.

Testcase: igt/gem_concurrent_blit
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c      | 12 ++++++++----
 drivers/gpu/drm/i915/i915_request.c         | 13 +++----------
 drivers/gpu/drm/i915/i915_scheduler.c       | 15 +++++++++++++--
 drivers/gpu/drm/i915/i915_scheduler.h       |  3 ++-
 drivers/gpu/drm/i915/i915_scheduler_types.h |  3 ++-
 5 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 4b042893dc0e..5b3d8e33f1cf 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -98,12 +98,14 @@ static int live_busywait_preempt(void *arg)
        ctx_hi = kernel_context(i915);
        if (!ctx_hi)
                goto err_unlock;
-       ctx_hi->sched.priority = INT_MAX;
+       ctx_hi->sched.priority =
+               I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
        ctx_lo = kernel_context(i915);
        if (!ctx_lo)
                goto err_ctx_hi;
-       ctx_lo->sched.priority = INT_MIN;
+       ctx_lo->sched.priority =
+               I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
        obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
        if (IS_ERR(obj)) {
@@ -958,12 +960,14 @@ static int live_preempt_hang(void *arg)
        ctx_hi = kernel_context(i915);
        if (!ctx_hi)
                goto err_spin_lo;
-       ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
+       ctx_hi->sched.priority =
+               I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
        ctx_lo = kernel_context(i915);
        if (!ctx_lo)
                goto err_ctx_hi;
-       ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+       ctx_lo->sched.priority =
+               I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
        for_each_engine(engine, i915, id) {
                struct i915_request *rq;
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 8cb3ed5531e3..1a04894a904b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -468,15 +468,6 @@ void __i915_request_unsubmit(struct i915_request *request)
        /* We may be recursing from the signal callback of another i915 fence */
        spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
-       /*
-        * As we do not allow WAIT to preempt inflight requests,
-        * once we have executed a request, along with triggering
-        * any execution callbacks, we must preserve its ordering
-        * within the non-preemptible FIFO.
-        */
-       BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
-       request->sched.attr.priority |= __NO_PREEMPTION;
-
        if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
                i915_request_cancel_breadcrumb(request);
 
@@ -861,7 +852,9 @@ i915_request_await_request(struct i915_request *to, struct 
i915_request *from)
                return 0;
 
        if (to->engine->schedule) {
-               ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
+               ret = i915_sched_node_add_dependency(&to->sched,
+                                                    &from->sched,
+                                                    I915_DEPENDENCY_EXTERNAL);
                if (ret < 0)
                        return ret;
        }
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 05eb50558aba..319eb8703451 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -388,6 +388,16 @@ bool __i915_sched_node_add_dependency(struct 
i915_sched_node *node,
                    !node_started(signal))
                        node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
 
+               /*
+                * As we do not allow WAIT to preempt inflight requests,
+                * once we have executed a request, along with triggering
+                * any execution callbacks, we must preserve its ordering
+                * within the non-preemptible FIFO.
+                */
+               BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
+               if (flags & I915_DEPENDENCY_EXTERNAL)
+                       __bump_priority(signal, __NO_PREEMPTION);
+
                ret = true;
        }
 
@@ -397,7 +407,8 @@ bool __i915_sched_node_add_dependency(struct 
i915_sched_node *node,
 }
 
 int i915_sched_node_add_dependency(struct i915_sched_node *node,
-                                  struct i915_sched_node *signal)
+                                  struct i915_sched_node *signal,
+                                  unsigned long flags)
 {
        struct i915_dependency *dep;
 
@@ -406,7 +417,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node 
*node,
                return -ENOMEM;
 
        if (!__i915_sched_node_add_dependency(node, signal, dep,
-                                             I915_DEPENDENCY_ALLOC))
+                                             flags | I915_DEPENDENCY_ALLOC))
                i915_dependency_free(dep);
 
        return 0;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
b/drivers/gpu/drm/i915/i915_scheduler.h
index 7eefccff39bf..fc7b6baa1355 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -33,7 +33,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node 
*node,
                                      unsigned long flags);
 
 int i915_sched_node_add_dependency(struct i915_sched_node *node,
-                                  struct i915_sched_node *signal);
+                                  struct i915_sched_node *signal,
+                                  unsigned long flags);
 
 void i915_sched_node_fini(struct i915_sched_node *node);
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h 
b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 166a457884b2..3e309631bd0b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -66,7 +66,8 @@ struct i915_dependency {
        struct list_head wait_link;
        struct list_head dfs_link;
        unsigned long flags;
-#define I915_DEPENDENCY_ALLOC BIT(0)
+#define I915_DEPENDENCY_ALLOC          BIT(0)
+#define I915_DEPENDENCY_EXTERNAL       BIT(1)
 };
 
 #endif /* _I915_SCHEDULER_TYPES_H_ */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to