As a topological sort, we expect it to run in linear graph time,
O(V+E). In removing the recursion, it is no longer a DFS but rather a
BFS, and performs as O(VE). Let's demonstrate how bad this is with a few
examples, and build a few test cases to verify a potential fix.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_scheduler.c         |   4 +
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 .../drm/i915/selftests/i915_perf_selftests.h  |   1 +
 .../gpu/drm/i915/selftests/i915_scheduler.c   | 530 ++++++++++++++++++
 4 files changed, 536 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/selftests/i915_scheduler.c

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index 45e000c257db..3c1a0b001746 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -565,6 +565,10 @@ void i915_sched_node_retire(struct i915_sched_node *node)
        spin_unlock_irq(&node->lock);
 }
 
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_scheduler.c"
+#endif
+
 static void i915_global_scheduler_shrink(void)
 {
        kmem_cache_shrink(global.slab_dependencies);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h 
b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index a92c0e9b7e6b..2200a5baa68e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -26,6 +26,7 @@ selftest(gt_mocs, intel_mocs_live_selftests)
 selftest(gt_pm, intel_gt_pm_live_selftests)
 selftest(gt_heartbeat, intel_heartbeat_live_selftests)
 selftest(requests, i915_request_live_selftests)
+selftest(scheduler, i915_scheduler_live_selftests)
 selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
 selftest(mman, i915_gem_mman_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h 
b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
index c2389f8a257d..137e35283fee 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h
@@ -17,5 +17,6 @@
  */
 selftest(engine_cs, intel_engine_cs_perf_selftests)
 selftest(request, i915_request_perf_selftests)
+selftest(scheduler, i915_scheduler_perf_selftests)
 selftest(blt, i915_gem_object_blt_perf_selftests)
 selftest(region, intel_memory_region_perf_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c 
b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
new file mode 100644
index 000000000000..98d29fa8f5f8
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "gt/intel_context.h"
+#include "gt/selftest_engine_heartbeat.h"
+#include "selftests/igt_spinner.h"
+
+static void scheduling_disable(struct intel_engine_cs *engine)
+{
+       engine->props.preempt_timeout_ms = 0;
+       engine->props.timeslice_duration_ms = 0;
+
+       st_engine_heartbeat_disable(engine);
+}
+
+static void scheduling_enable(struct intel_engine_cs *engine)
+{
+       st_engine_heartbeat_enable(engine);
+
+       engine->props.preempt_timeout_ms =
+               engine->defaults.preempt_timeout_ms;
+       engine->props.timeslice_duration_ms =
+               engine->defaults.timeslice_duration_ms;
+}
+
+static int first_engine(struct drm_i915_private *i915,
+                       int (*chain)(struct intel_engine_cs *engine,
+                                    unsigned long param,
+                                    void (*fn)(struct i915_request *rq,
+                                               unsigned long v,
+                                               unsigned long e)),
+                       unsigned long param,
+                       void (*fn)(struct i915_request *rq,
+                                  unsigned long v, unsigned long e))
+{
+       struct intel_engine_cs *engine;
+
+       for_each_uabi_engine(engine, i915) {
+               if (!intel_engine_has_scheduler(engine))
+                       continue;
+
+               return chain(engine, param, fn);
+       }
+
+       return 0;
+}
+
+static int all_engines(struct drm_i915_private *i915,
+                      int (*chain)(struct intel_engine_cs *engine,
+                                   unsigned long param,
+                                   void (*fn)(struct i915_request *rq,
+                                              unsigned long v,
+                                              unsigned long e)),
+                      unsigned long param,
+                      void (*fn)(struct i915_request *rq,
+                                 unsigned long v, unsigned long e))
+{
+       struct intel_engine_cs *engine;
+       int err;
+
+       for_each_uabi_engine(engine, i915) {
+               if (!intel_engine_has_scheduler(engine))
+                       continue;
+
+               err = chain(engine, param, fn);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static bool check_context_order(struct intel_engine_cs *engine)
+{
+       u64 last_seqno, last_context;
+       unsigned long count;
+       bool result = false;
+       struct rb_node *rb;
+       int last_prio;
+
+       /* We expect the execution order to follow ascending fence-context */
+       spin_lock_irq(&engine->active.lock);
+
+       count = 0;
+       last_context = 0;
+       for (rb = rb_first_cached(&engine->execlists.queue); rb; rb = 
rb_next(rb)) {
+               struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+               struct i915_request *rq;
+
+               priolist_for_each_request(rq, p) {
+                       if (rq->fence.context < last_context ||
+                           (rq->fence.context == last_context &&
+                            rq->fence.seqno < last_seqno)) {
+                               pr_err("[%lu] %llx:%lld [prio:%d] after 
%llx:%lld [prio:%d]\n",
+                                      count,
+                                      rq->fence.context,
+                                      rq->fence.seqno,
+                                      rq_prio(rq),
+                                      last_context,
+                                      last_seqno,
+                                      last_prio);
+                               goto out_unlock;
+                       }
+
+                       last_context = rq->fence.context;
+                       last_seqno = rq->fence.seqno;
+                       last_prio = rq_prio(rq);
+                       count++;
+               }
+       }
+       result = true;
+out_unlock:
+       spin_unlock_irq(&engine->active.lock);
+
+       return result;
+}
+
+static int __single_chain(struct intel_engine_cs *engine, unsigned long length,
+                         void (*fn)(struct i915_request *rq,
+                                    unsigned long v, unsigned long e))
+{
+       struct intel_context *ce;
+       struct igt_spinner spin;
+       struct i915_request *rq;
+       unsigned long count;
+       unsigned long min;
+       int err = 0;
+
+       scheduling_disable(engine);
+
+       if (igt_spinner_init(&spin, engine->gt)) {
+               err = -ENOMEM;
+               goto err_heartbeat;
+       }
+
+       ce = intel_context_create(engine);
+       if (IS_ERR(ce)) {
+               err = PTR_ERR(ce);
+               goto err_spin;
+       }
+       ce->ring = __intel_context_ring_size(SZ_512K);
+
+       rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               goto err_context;
+       }
+       i915_request_add(rq);
+       min = ce->ring->size - ce->ring->space;
+
+       count = 1;
+       while (count < length && ce->ring->space > min) {
+               rq = intel_context_create_request(ce);
+               if (IS_ERR(rq)) {
+                       err = PTR_ERR(rq);
+                       break;
+               }
+               i915_request_add(rq);
+               count++;
+       }
+
+       tasklet_disable(&engine->execlists.tasklet);
+       local_bh_disable();
+       fn(rq, count, count - 1);
+       if (!check_context_order(engine))
+               err = -EINVAL;
+       local_bh_enable();
+       tasklet_enable(&engine->execlists.tasklet);
+
+       igt_spinner_end(&spin);
+err_context:
+       intel_context_put(ce);
+err_spin:
+       igt_spinner_fini(&spin);
+err_heartbeat:
+       scheduling_enable(engine);
+       return err;
+}
+
+static int __wide_chain(struct intel_engine_cs *engine, unsigned long width,
+                       void (*fn)(struct i915_request *rq,
+                                  unsigned long v, unsigned long e))
+{
+       struct intel_context **ce;
+       struct i915_request **rq;
+       struct igt_spinner spin;
+       unsigned long count;
+       unsigned long i, j;
+       int err = 0;
+
+       scheduling_disable(engine);
+
+       if (igt_spinner_init(&spin, engine->gt)) {
+               err = -ENOMEM;
+               goto err_heartbeat;
+       }
+
+       ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
+       if (!ce) {
+               err = -ENOMEM;
+               goto err_spin;
+       }
+
+       for (i = 0; i < width; i++) {
+               ce[i] = intel_context_create(engine);
+               if (IS_ERR(ce[i])) {
+                       err = PTR_ERR(ce[i]);
+                       width = i;
+                       goto err_context;
+               }
+       }
+
+       rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
+       if (!rq) {
+               err = -ENOMEM;
+               goto err_context;
+       }
+
+       rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
+       if (IS_ERR(rq[0])) {
+               err = PTR_ERR(rq[0]);
+               goto err_free;
+       }
+       i915_request_add(rq[0]);
+
+       count = 0;
+       for (i = 1; i < width; i++) {
+               GEM_BUG_ON(i915_request_completed(rq[0]));
+
+               rq[i] = intel_context_create_request(ce[i]);
+               if (IS_ERR(rq[i])) {
+                       err = PTR_ERR(rq[i]);
+                       break;
+               }
+               for (j = 0; j < i; j++) {
+                       err = i915_request_await_dma_fence(rq[i],
+                                                          &rq[j]->fence);
+                       if (err)
+                               break;
+                       count++;
+               }
+               i915_request_add(rq[i]);
+       }
+
+       tasklet_disable(&engine->execlists.tasklet);
+       local_bh_disable();
+       fn(rq[i - 1], i, count);
+       if (!check_context_order(engine))
+               err = -EINVAL;
+       local_bh_enable();
+       tasklet_enable(&engine->execlists.tasklet);
+
+       igt_spinner_end(&spin);
+err_free:
+       kfree(rq);
+err_context:
+       for (i = 0; i < width; i++)
+               intel_context_put(ce[i]);
+       kfree(ce);
+err_spin:
+       igt_spinner_fini(&spin);
+err_heartbeat:
+       scheduling_enable(engine);
+       return err;
+}
+
+static int __inv_chain(struct intel_engine_cs *engine, unsigned long width,
+                      void (*fn)(struct i915_request *rq,
+                                 unsigned long v, unsigned long e))
+{
+       struct intel_context **ce;
+       struct i915_request **rq;
+       struct igt_spinner spin;
+       unsigned long count;
+       unsigned long i, j;
+       int err = 0;
+
+       scheduling_disable(engine);
+
+       if (igt_spinner_init(&spin, engine->gt)) {
+               err = -ENOMEM;
+               goto err_heartbeat;
+       }
+
+       ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL);
+       if (!ce) {
+               err = -ENOMEM;
+               goto err_spin;
+       }
+
+       for (i = 0; i < width; i++) {
+               ce[i] = intel_context_create(engine);
+               if (IS_ERR(ce[i])) {
+                       err = PTR_ERR(ce[i]);
+                       width = i;
+                       goto err_context;
+               }
+       }
+
+       rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL);
+       if (!rq) {
+               err = -ENOMEM;
+               goto err_context;
+       }
+
+       rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP);
+       if (IS_ERR(rq[0])) {
+               err = PTR_ERR(rq[0]);
+               goto err_free;
+       }
+       i915_request_add(rq[0]);
+
+       count = 0;
+       for (i = 1; i < width; i++) {
+               GEM_BUG_ON(i915_request_completed(rq[0]));
+
+               rq[i] = intel_context_create_request(ce[i]);
+               if (IS_ERR(rq[i])) {
+                       err = PTR_ERR(rq[i]);
+                       break;
+               }
+               for (j = i; j > 0; j--) {
+                       err = i915_request_await_dma_fence(rq[i],
+                                                          &rq[j - 1]->fence);
+                       if (err)
+                               break;
+                       count++;
+               }
+               i915_request_add(rq[i]);
+       }
+
+       tasklet_disable(&engine->execlists.tasklet);
+       local_bh_disable();
+       fn(rq[i - 1], i, count);
+       if (!check_context_order(engine))
+               err = -EINVAL;
+       local_bh_enable();
+       tasklet_enable(&engine->execlists.tasklet);
+
+       igt_spinner_end(&spin);
+err_free:
+       kfree(rq);
+err_context:
+       for (i = 0; i < width; i++)
+               intel_context_put(ce[i]);
+       kfree(ce);
+err_spin:
+       igt_spinner_fini(&spin);
+err_heartbeat:
+       scheduling_enable(engine);
+       return err;
+}
+
+static int igt_single(struct drm_i915_private *i915,
+                     void (*fn)(struct i915_request *rq,
+                                unsigned long v, unsigned long e))
+{
+       return all_engines(i915, __single_chain, 17, fn);
+}
+
+static int igt_wide(struct drm_i915_private *i915,
+                   void (*fn)(struct i915_request *rq,
+                              unsigned long v, unsigned long e))
+{
+       return all_engines(i915, __wide_chain, 17, fn);
+}
+
+static int igt_inv(struct drm_i915_private *i915,
+                  void (*fn)(struct i915_request *rq,
+                             unsigned long v, unsigned long e))
+{
+       return all_engines(i915, __inv_chain, 17, fn);
+}
+
+static void igt_priority(struct i915_request *rq,
+                        unsigned long v, unsigned long e)
+{
+       i915_request_set_priority(rq, I915_PRIORITY_BARRIER);
+       GEM_BUG_ON(rq_prio(rq) != I915_PRIORITY_BARRIER);
+}
+
+static int igt_priority_single(void *arg)
+{
+       return igt_single(arg, igt_priority);
+}
+
+static int igt_priority_wide(void *arg)
+{
+       return igt_wide(arg, igt_priority);
+}
+
+static int igt_priority_inv(void *arg)
+{
+       return igt_inv(arg, igt_priority);
+}
+
+int i915_scheduler_live_selftests(struct drm_i915_private *i915)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(igt_priority_single),
+               SUBTEST(igt_priority_wide),
+               SUBTEST(igt_priority_inv),
+       };
+
+       return i915_subtests(tests, i915);
+}
+
+static int chains(struct drm_i915_private *i915,
+                 int (*chain)(struct drm_i915_private *i915,
+                              unsigned long length,
+                              void (*fn)(struct i915_request *rq,
+                                         unsigned long v, unsigned long e)),
+                 void (*fn)(struct i915_request *rq,
+                            unsigned long v, unsigned long e))
+{
+       unsigned long x[] = { 1, 4, 16, 64, 128, 256, 512, 1024, 4096 };
+       int i, err;
+
+       for (i = 0; i < ARRAY_SIZE(x); i++) {
+               IGT_TIMEOUT(end_time);
+
+               err = chain(i915, x[i], fn);
+               if (err)
+                       return err;
+
+               if (__igt_timeout(end_time, NULL))
+                       break;
+       }
+
+       return 0;
+}
+
+static int single_chain(struct drm_i915_private *i915,
+                       unsigned long length,
+                       void (*fn)(struct i915_request *rq,
+                                  unsigned long v, unsigned long e))
+{
+       return first_engine(i915, __single_chain, length, fn);
+}
+
+static int single(struct drm_i915_private *i915,
+                 void (*fn)(struct i915_request *rq,
+                            unsigned long v, unsigned long e))
+{
+       return chains(i915, single_chain, fn);
+}
+
+static int wide_chain(struct drm_i915_private *i915,
+                     unsigned long width,
+                     void (*fn)(struct i915_request *rq,
+                                unsigned long v, unsigned long e))
+{
+       return first_engine(i915, __wide_chain, width, fn);
+}
+
+static int wide(struct drm_i915_private *i915,
+               void (*fn)(struct i915_request *rq,
+                          unsigned long v, unsigned long e))
+{
+       return chains(i915, wide_chain, fn);
+}
+
+static int inv_chain(struct drm_i915_private *i915,
+                    unsigned long width,
+                    void (*fn)(struct i915_request *rq,
+                               unsigned long v, unsigned long e))
+{
+       return first_engine(i915, __inv_chain, width, fn);
+}
+
+static int inv(struct drm_i915_private *i915,
+              void (*fn)(struct i915_request *rq,
+                         unsigned long v, unsigned long e))
+{
+       return chains(i915, inv_chain, fn);
+}
+
+static void report(const char *what, unsigned long v, unsigned long e, u64 dt)
+{
+       pr_info("(%4lu, %7lu), %s:%10lluns\n", v, e, what, dt);
+}
+
+static u64 __set_priority(struct i915_request *rq, int prio)
+{
+       u64 dt;
+
+       preempt_disable();
+       dt = ktime_get_raw_fast_ns();
+       i915_request_set_priority(rq, prio);
+       dt = ktime_get_raw_fast_ns() - dt;
+       preempt_enable();
+
+       return dt;
+}
+
+static void set_priority(struct i915_request *rq,
+                        unsigned long v, unsigned long e)
+{
+       report("set-priority", v, e, __set_priority(rq, I915_PRIORITY_BARRIER));
+}
+
+static int single_priority(void *arg)
+{
+       return single(arg, set_priority);
+}
+
+static int wide_priority(void *arg)
+{
+       return wide(arg, set_priority);
+}
+
+static int inv_priority(void *arg)
+{
+       return inv(arg, set_priority);
+}
+
+int i915_scheduler_perf_selftests(struct drm_i915_private *i915)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(single_priority),
+               SUBTEST(wide_priority),
+               SUBTEST(inv_priority),
+       };
+
+       return i915_subtests(tests, i915);
+}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to