Re: [Intel-gfx] [PATCH 05/19] drm/i915/selftests: Verify we can perform resets from atomic context

2018-12-13 Thread Chris Wilson
Quoting Tvrtko Ursulin (2018-12-13 08:28:00)
> 
> On 12/12/2018 13:41, Chris Wilson wrote:
> > +static int igt_atomic_reset(void *arg)
> > +{
> > + static const struct atomic_section phases[] = {
> > + { "preempt", __preempt_begin, __preempt_end },
> > + { "softirq", __softirq_begin, __softirq_end },
> > + { "hardirq", __hardirq_begin, __hardirq_end },
> > + { }
> > + };
> > + struct drm_i915_private *i915 = arg;
> > + int err = 0;
> > +
> > + /* Check that the resets are usable from atomic context */
> > +
> > + if (USES_GUC_SUBMISSION(i915))
> > + return 0; /* guc is dead; long live the guc */
> > +
> > + igt_global_reset_lock(i915);
> > + mutex_lock(&i915->drm.struct_mutex);
> > + intel_runtime_pm_get(i915);
> > +
> > + /* Flush any requests before we get started and check basics */
> > + force_reset(i915);
> > + if (i915_terminally_wedged(&i915->gpu_error))
> > + goto unlock;
> > +
> > + if (intel_has_reset_engine(i915)) {
> 
> This check could/should go earlier.

My plan is to add a device reset test here later.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 05/19] drm/i915/selftests: Verify we can perform resets from atomic context

2018-12-13 Thread Tvrtko Ursulin


On 12/12/2018 13:41, Chris Wilson wrote:

We currently require that our per-engine reset can be called from any
context, even hardirq, and in the future wish to perform the device
reset without holding struct_mutex (which requires some lockless
shenanigans that demand the lowlevel intel_reset_gpu() be able to be
used in atomic context). Test that we meet the current requirements by
calling i915_reset_engine() from under various atomic contexts.

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
---
  .../gpu/drm/i915/selftests/intel_hangcheck.c  | 172 ++
  1 file changed, 172 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c 
b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 65c318d14077..5f2f67b149af 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -1470,6 +1470,177 @@ static int igt_handle_error(void *arg)
return err;
  }
  
+static void __preempt_begin(void)

+{
+   preempt_disable();
+}
+
+static void __preempt_end(void)
+{
+   preempt_enable();
+}
+
+static void __softirq_begin(void)
+{
+   local_bh_disable();
+}
+
+static void __softirq_end(void)
+{
+   local_bh_enable();
+}
+
+static void __hardirq_begin(void)
+{
+   local_irq_disable();
+}
+
+static void __hardirq_end(void)
+{
+   local_irq_enable();
+}
+
+struct atomic_section {
+   const char *name;
+   void (*critical_section_begin)(void);
+   void (*critical_section_end)(void);
+};
+
+static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
+const struct atomic_section *p,
+const char *mode)
+{
+   struct tasklet_struct * const t = &engine->execlists.tasklet;
+   int err;
+
+   GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
+ engine->name, mode, p->name);
+
+   tasklet_disable_nosync(t);
+   p->critical_section_begin();
+
+   err = i915_reset_engine(engine, NULL);
+
+   p->critical_section_end();
+   tasklet_enable(t);
+
+   if (err)
+   pr_err("i915_reset_engine(%s:%s) failed under %s\n",
+  engine->name, mode, p->name);
+
+   return err;
+}
+
+static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
+  const struct atomic_section *p)
+{
+   struct drm_i915_private *i915 = engine->i915;
+   struct i915_request *rq;
+   struct hang h;
+   int err;
+
+   err = __igt_atomic_reset_engine(engine, p, "idle");
+   if (err)
+   return err;
+
+   err = hang_init(&h, i915);
+   if (err)
+   return err;
+
+   rq = hang_create_request(&h, engine);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   goto out;
+   }
+
+   i915_request_get(rq);
+   i915_request_add(rq);
+
+   if (wait_until_running(&h, rq)) {
+   err = __igt_atomic_reset_engine(engine, p, "active");
+   } else {
+   pr_err("%s(%s): Failed to start request %llx, at %x\n",
+  __func__, engine->name,
+  rq->fence.seqno, hws_seqno(&h, rq));
+   err = -EIO;
+   }
+
+   if (err == 0) {
+   struct igt_wedge_me w;
+
+   igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/)
+   i915_request_wait(rq,
+ I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT);
+   if (i915_terminally_wedged(&i915->gpu_error))
+   err = -EIO;
+   }
+
+   i915_request_put(rq);
+out:
+   hang_fini(&h);
+   return err;
+}
+
+static void force_reset(struct drm_i915_private *i915)
+{
+   i915_gem_set_wedged(i915);
+   set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
+   i915_reset(i915, 0, NULL);
+}
+
+static int igt_atomic_reset(void *arg)
+{
+   static const struct atomic_section phases[] = {
+   { "preempt", __preempt_begin, __preempt_end },
+   { "softirq", __softirq_begin, __softirq_end },
+   { "hardirq", __hardirq_begin, __hardirq_end },
+   { }
+   };
+   struct drm_i915_private *i915 = arg;
+   int err = 0;
+
+   /* Check that the resets are usable from atomic context */
+
+   if (USES_GUC_SUBMISSION(i915))
+   return 0; /* guc is dead; long live the guc */
+
+   igt_global_reset_lock(i915);
+   mutex_lock(&i915->drm.struct_mutex);
+   intel_runtime_pm_get(i915);
+
+   /* Flush any requests before we get started and check basics */
+   force_reset(i915);
+   if (i915_terminally_wedged(&i915->gpu_error))
+   goto unlock;
+
+   if (intel_has_reset_engine(i915)) {


This check could/should go earlier.


+   struct intel_engine_cs *engine;
+ 

[Intel-gfx] [PATCH 05/19] drm/i915/selftests: Verify we can perform resets from atomic context

2018-12-12 Thread Chris Wilson
We currently require that our per-engine reset can be called from any
context, even hardirq, and in the future wish to perform the device
reset without holding struct_mutex (which requires some lockless
shenanigans that demand the lowlevel intel_reset_gpu() be able to be
used in atomic context). Test that we meet the current requirements by
calling i915_reset_engine() from under various atomic contexts.

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
---
 .../gpu/drm/i915/selftests/intel_hangcheck.c  | 172 ++
 1 file changed, 172 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c 
b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 65c318d14077..5f2f67b149af 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -1470,6 +1470,177 @@ static int igt_handle_error(void *arg)
return err;
 }
 
+static void __preempt_begin(void)
+{
+   preempt_disable();
+}
+
+static void __preempt_end(void)
+{
+   preempt_enable();
+}
+
+static void __softirq_begin(void)
+{
+   local_bh_disable();
+}
+
+static void __softirq_end(void)
+{
+   local_bh_enable();
+}
+
+static void __hardirq_begin(void)
+{
+   local_irq_disable();
+}
+
+static void __hardirq_end(void)
+{
+   local_irq_enable();
+}
+
+struct atomic_section {
+   const char *name;
+   void (*critical_section_begin)(void);
+   void (*critical_section_end)(void);
+};
+
+static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
+const struct atomic_section *p,
+const char *mode)
+{
+   struct tasklet_struct * const t = &engine->execlists.tasklet;
+   int err;
+
+   GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
+ engine->name, mode, p->name);
+
+   tasklet_disable_nosync(t);
+   p->critical_section_begin();
+
+   err = i915_reset_engine(engine, NULL);
+
+   p->critical_section_end();
+   tasklet_enable(t);
+
+   if (err)
+   pr_err("i915_reset_engine(%s:%s) failed under %s\n",
+  engine->name, mode, p->name);
+
+   return err;
+}
+
+static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
+  const struct atomic_section *p)
+{
+   struct drm_i915_private *i915 = engine->i915;
+   struct i915_request *rq;
+   struct hang h;
+   int err;
+
+   err = __igt_atomic_reset_engine(engine, p, "idle");
+   if (err)
+   return err;
+
+   err = hang_init(&h, i915);
+   if (err)
+   return err;
+
+   rq = hang_create_request(&h, engine);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   goto out;
+   }
+
+   i915_request_get(rq);
+   i915_request_add(rq);
+
+   if (wait_until_running(&h, rq)) {
+   err = __igt_atomic_reset_engine(engine, p, "active");
+   } else {
+   pr_err("%s(%s): Failed to start request %llx, at %x\n",
+  __func__, engine->name,
+  rq->fence.seqno, hws_seqno(&h, rq));
+   err = -EIO;
+   }
+
+   if (err == 0) {
+   struct igt_wedge_me w;
+
+   igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/)
+   i915_request_wait(rq,
+ I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT);
+   if (i915_terminally_wedged(&i915->gpu_error))
+   err = -EIO;
+   }
+
+   i915_request_put(rq);
+out:
+   hang_fini(&h);
+   return err;
+}
+
+static void force_reset(struct drm_i915_private *i915)
+{
+   i915_gem_set_wedged(i915);
+   set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
+   i915_reset(i915, 0, NULL);
+}
+
+static int igt_atomic_reset(void *arg)
+{
+   static const struct atomic_section phases[] = {
+   { "preempt", __preempt_begin, __preempt_end },
+   { "softirq", __softirq_begin, __softirq_end },
+   { "hardirq", __hardirq_begin, __hardirq_end },
+   { }
+   };
+   struct drm_i915_private *i915 = arg;
+   int err = 0;
+
+   /* Check that the resets are usable from atomic context */
+
+   if (USES_GUC_SUBMISSION(i915))
+   return 0; /* guc is dead; long live the guc */
+
+   igt_global_reset_lock(i915);
+   mutex_lock(&i915->drm.struct_mutex);
+   intel_runtime_pm_get(i915);
+
+   /* Flush any requests before we get started and check basics */
+   force_reset(i915);
+   if (i915_terminally_wedged(&i915->gpu_error))
+   goto unlock;
+
+   if (intel_has_reset_engine(i915)) {
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+
+   for_each_engine(engine, i915, id) {
+