[Intel-gfx] [PATCH v2] drm/i915/selftests: Exercise context switching in parallel

2019-09-30 Thread Chris Wilson
We currently test context switching on each engine as a basic stress
test (just verifying that nothing explodes if we execute 2 requests from
different contexts sequentially). What we have not tested is what
happens if we try and do so on all available engines simultaneously,
putting our SW and the HW under the maximal stress.

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
---
Keep struct_mutex the outer lock while it still exists
---
 .../drm/i915/gem/selftests/i915_gem_context.c | 203 ++
 1 file changed, 203 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index dc25bcc3e372..8325c7329dc7 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -156,6 +156,208 @@ static int live_nop_switch(void *arg)
return err;
 }
 
+struct parallel_switch {
+   struct task_struct *tsk;
+   struct intel_context *ce[2];
+};
+
+static int __live_parallel_switch1(void *data)
+{
+   struct parallel_switch *arg = data;
+   struct drm_i915_private *i915 = arg->ce[0]->engine->i915;
+   IGT_TIMEOUT(end_time);
+   unsigned long count;
+
+   count = 0;
+   do {
+   struct i915_request *rq;
+   int err;
+
+   mutex_lock(&i915->drm.struct_mutex);
+   rq = i915_request_create(arg->ce[0]);
+   if (IS_ERR(rq)) {
+   mutex_unlock(&i915->drm.struct_mutex);
+   return PTR_ERR(rq);
+   }
+
+   i915_request_add(rq);
+   mutex_unlock(&i915->drm.struct_mutex);
+
+   mutex_lock(&i915->drm.struct_mutex);
+   rq = i915_request_create(arg->ce[1]);
+   if (IS_ERR(rq)) {
+   mutex_unlock(&i915->drm.struct_mutex);
+   return PTR_ERR(rq);
+   }
+
+   i915_request_get(rq);
+   i915_request_add(rq);
+   mutex_unlock(&i915->drm.struct_mutex);
+
+   err = 0;
+   if (i915_request_wait(rq, 0, HZ / 5) < 0)
+   err = -ETIME;
+   i915_request_put(rq);
+   if (err)
+   return err;
+
+   count++;
+   } while (!__igt_timeout(end_time, NULL));
+
+   pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
+   return 0;
+}
+
+static int __live_parallel_switchN(void *data)
+{
+   struct parallel_switch *arg = data;
+   struct drm_i915_private *i915 = arg->ce[0]->engine->i915;
+   IGT_TIMEOUT(end_time);
+   unsigned long count;
+
+   count = 0;
+   do {
+   struct i915_request *rq;
+
+   mutex_lock(&i915->drm.struct_mutex);
+   rq = i915_request_create(arg->ce[0]);
+   if (IS_ERR(rq)) {
+   mutex_unlock(&i915->drm.struct_mutex);
+   return PTR_ERR(rq);
+   }
+
+   i915_request_add(rq);
+   mutex_unlock(&i915->drm.struct_mutex);
+
+   mutex_lock(&i915->drm.struct_mutex);
+   rq = i915_request_create(arg->ce[1]);
+   if (IS_ERR(rq)) {
+   mutex_unlock(&i915->drm.struct_mutex);
+   return PTR_ERR(rq);
+   }
+
+   i915_request_add(rq);
+   mutex_unlock(&i915->drm.struct_mutex);
+
+   count++;
+   } while (!__igt_timeout(end_time, NULL));
+
+   pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
+   return 0;
+}
+
+static int live_parallel_switch(void *arg)
+{
+   struct drm_i915_private *i915 = arg;
+   static int (* const func[])(void *arg) = {
+   __live_parallel_switch1,
+   __live_parallel_switchN,
+   NULL,
+   };
+   struct i915_gem_context *ctx[2];
+   struct parallel_switch *data;
+   int (* const *fn)(void *arg);
+   struct drm_file *file;
+   int err = 0;
+   int n;
+
+   /*
+* Check we can process switches on all engines simultaneously.
+*/
+
+   if (!DRIVER_CAPS(i915)->has_logical_contexts)
+   return 0;
+
+   file = mock_file(i915);
+   if (IS_ERR(file))
+   return PTR_ERR(file);
+
+   data = kcalloc(I915_NUM_ENGINES, sizeof(*data), GFP_KERNEL);
+   if (!data)
+   return -ENOMEM;
+
+   for (n = 0; n < ARRAY_SIZE(ctx); n++) {
+   struct i915_gem_engines_iter it;
+   struct intel_context *ce;
+
+   mutex_lock(&i915->drm.struct_mutex);
+   ctx[n] = live_context(i915, file);
+   if (IS_ERR(ctx[n])) {
+   err = PTR_ERR(ctx[n]);
+   goto out_locked;
+   }
+
+   for_each_gem_engine(ce,
+ 

Re: [Intel-gfx] [PATCH v2] drm/i915/selftests: Exercise context switching in parallel

2019-09-30 Thread Tvrtko Ursulin


On 30/09/2019 12:31, Chris Wilson wrote:

We currently test context switching on each engine as a basic stress
test (just verifying that nothing explodes if we execute 2 requests from
different contexts sequentially). What we have not tested is what
happens if we try and do so on all available engines simultaneously,
putting our SW and the HW under the maximal stress.

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
---
Keep struct_mutex the outer lock while it still exists
---
  .../drm/i915/gem/selftests/i915_gem_context.c | 203 ++
  1 file changed, 203 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index dc25bcc3e372..8325c7329dc7 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -156,6 +156,208 @@ static int live_nop_switch(void *arg)
return err;
  }
  
+struct parallel_switch {

+   struct task_struct *tsk;
+   struct intel_context *ce[2];
+};
+
+static int __live_parallel_switch1(void *data)
+{
+   struct parallel_switch *arg = data;
+   struct drm_i915_private *i915 = arg->ce[0]->engine->i915;
+   IGT_TIMEOUT(end_time);
+   unsigned long count;
+
+   count = 0;
+   do {
+   struct i915_request *rq;
+   int err;
+
+   mutex_lock(&i915->drm.struct_mutex);
+   rq = i915_request_create(arg->ce[0]);
+   if (IS_ERR(rq)) {
+   mutex_unlock(&i915->drm.struct_mutex);
+   return PTR_ERR(rq);
+   }
+
+   i915_request_add(rq);
+   mutex_unlock(&i915->drm.struct_mutex);
+
+   mutex_lock(&i915->drm.struct_mutex);


unlock-lock! :) I guess in anticipation of removing it all.


+   rq = i915_request_create(arg->ce[1]);
+   if (IS_ERR(rq)) {
+   mutex_unlock(&i915->drm.struct_mutex);
+   return PTR_ERR(rq);
+   }
+
+   i915_request_get(rq);
+   i915_request_add(rq);
+   mutex_unlock(&i915->drm.struct_mutex);
+
+   err = 0;
+   if (i915_request_wait(rq, 0, HZ / 5) < 0)
+   err = -ETIME;
+   i915_request_put(rq);
+   if (err)
+   return err;
+
+   count++;
+   } while (!__igt_timeout(end_time, NULL));
+
+   pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
+   return 0;
+}
+
+static int __live_parallel_switchN(void *data)
+{
+   struct parallel_switch *arg = data;
+   struct drm_i915_private *i915 = arg->ce[0]->engine->i915;
+   IGT_TIMEOUT(end_time);
+   unsigned long count;
+
+   count = 0;
+   do {
+   struct i915_request *rq;
+
+   mutex_lock(&i915->drm.struct_mutex);
+   rq = i915_request_create(arg->ce[0]);
+   if (IS_ERR(rq)) {
+   mutex_unlock(&i915->drm.struct_mutex);
+   return PTR_ERR(rq);
+   }
+
+   i915_request_add(rq);
+   mutex_unlock(&i915->drm.struct_mutex);
+
+   mutex_lock(&i915->drm.struct_mutex);
+   rq = i915_request_create(arg->ce[1]);
+   if (IS_ERR(rq)) {
+   mutex_unlock(&i915->drm.struct_mutex);
+   return PTR_ERR(rq);
+   }
+
+   i915_request_add(rq);
+   mutex_unlock(&i915->drm.struct_mutex);
+
+   count++;
+   } while (!__igt_timeout(end_time, NULL));
+
+   pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
+   return 0;
+}
+
+static int live_parallel_switch(void *arg)
+{
+   struct drm_i915_private *i915 = arg;
+   static int (* const func[])(void *arg) = {
+   __live_parallel_switch1,
+   __live_parallel_switchN,
+   NULL,
+   };
+   struct i915_gem_context *ctx[2];
+   struct parallel_switch *data;
+   int (* const *fn)(void *arg);
+   struct drm_file *file;
+   int err = 0;
+   int n;
+
+   /*
+* Check we can process switches on all engines simultaneously.
+*/
+
+   if (!DRIVER_CAPS(i915)->has_logical_contexts)
+   return 0;
+
+   file = mock_file(i915);
+   if (IS_ERR(file))
+   return PTR_ERR(file);
+
+   data = kcalloc(I915_NUM_ENGINES, sizeof(*data), GFP_KERNEL);


There is a little bit of mixing up I915_NUM_ENGINES and gem engines 
(which contains the num_engines field) in this function.


I think it would be better to limit to one - so maybe get the count from 
gem engines? It can't change during selftest so don't have to have them 
locked for the whole time.



+   if (!data)


mock_file_free


+   return -ENOMEM;
+
+ 

Re: [Intel-gfx] [PATCH v2] drm/i915/selftests: Exercise context switching in parallel

2019-09-30 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-09-30 14:47:26)
> 
> On 30/09/2019 12:31, Chris Wilson wrote:
> > +static int live_parallel_switch(void *arg)
> > +{
> > + struct drm_i915_private *i915 = arg;
> > + static int (* const func[])(void *arg) = {
> > + __live_parallel_switch1,
> > + __live_parallel_switchN,
> > + NULL,
> > + };
> > + struct i915_gem_context *ctx[2];
> > + struct parallel_switch *data;
> > + int (* const *fn)(void *arg);
> > + struct drm_file *file;
> > + int err = 0;
> > + int n;
> > +
> > + /*
> > +  * Check we can process switches on all engines simultaneously.
> > +  */
> > +
> > + if (!DRIVER_CAPS(i915)->has_logical_contexts)
> > + return 0;
> > +
> > + file = mock_file(i915);
> > + if (IS_ERR(file))
> > + return PTR_ERR(file);
> > +
> > + data = kcalloc(I915_NUM_ENGINES, sizeof(*data), GFP_KERNEL);
> 
> There is a little bit of mixing up I915_NUM_ENGINES and gem engines 
> (which contains the num_engines field) in this function.
> 
> I think it would be better to limit to one - so maybe get the count from 
> gem engines? It can't change during selftest so don't have to have them 
> locked for the whole time.
> 
> > + if (!data)
> 
> mock_file_free
> 
> > + return -ENOMEM;
> > +
> > + for (n = 0; n < ARRAY_SIZE(ctx); n++) {
> > + struct i915_gem_engines_iter it;
> > + struct intel_context *ce;
> > +
> > + mutex_lock(&i915->drm.struct_mutex);
> > + ctx[n] = live_context(i915, file);
> > + if (IS_ERR(ctx[n])) {
> > + err = PTR_ERR(ctx[n]);
> > + goto out_locked;
> > + }
> > +
> > + for_each_gem_engine(ce,
> > + i915_gem_context_lock_engines(ctx[n]), 
> > it) {
> > + err = intel_context_pin(ce);
> > + if (err) {
> > + i915_gem_context_unlock_engines(ctx[n]);
> > + goto out_locked;
> > + }
> > + data[ce->engine->legacy_idx].ce[n] = ce;
> 
> IMHO a bit confusing to use legacy_idx - makes it sound like there is 
> some significance to the legacy part so why not just use engine->id?

Default engine list with legacy_idx is nice and linear, with a cap of
I915_NUM_ENGINES.

Ok, I have a weirder plan...
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx