Re: [Intel-gfx] [PATCH] drm/i915/selftests: Exercise CS TLB invalidation

2019-09-19 Thread Chris Wilson
Quoting Mika Kuoppala (2019-09-19 14:39:59)
> Chris Wilson  writes:
> 
> > Quoting Mika Kuoppala (2019-09-19 13:57:45)
> >> Chris Wilson  writes:
> >> > + vma = i915_vma_instance(out, vm, NULL);
> >> > + if (IS_ERR(vma)) {
> >> > + err = PTR_ERR(vma);
> >> > + goto out_batch;
> >> > + }
> >> > +
> >> > + err = i915_vma_pin(vma, 0, 0,
> >> > +PIN_USER |
> >> > +PIN_OFFSET_FIXED |
> >> > +(vm->total - PAGE_SIZE));
> >> > + if (err)
> >> > + goto out_out;
> >> 
> >> out_put?
> >
> > Joonas likes out: for normal exit paths that double for error handling.
> 
> Ok. Fine with me. I just like the last part to describe what the first part
> of onion out does. Don't have to so much scroll back and forth
> in editor.

Oh, I didn't notice I had a typo. I thought it was out_put!
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915/selftests: Exercise CS TLB invalidation

2019-09-19 Thread Mika Kuoppala
Chris Wilson  writes:

> Quoting Mika Kuoppala (2019-09-19 13:57:45)
>> Chris Wilson  writes:
>> 
>> > Check that we are correctly invalidating the TLB at the start of a
>> > batch after updating the GTT.
>> >
>> > Signed-off-by: Chris Wilson 
>> > Cc: Mika Kuoppala 
>> > Cc: Daniele Ceraolo Spurio 
>> > ---
>> >  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 227 ++
>> >  1 file changed, 227 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c 
>> > b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
>> > index 598c18d10640..f8709b332bd7 100644
>> > --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
>> > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
>> > @@ -25,13 +25,16 @@
>> >  #include 
>> >  #include 
>> >  
>> > +#include "gem/i915_gem_context.h"
>> >  #include "gem/selftests/mock_context.h"
>> > +#include "gt/intel_context.h"
>> >  
>> >  #include "i915_random.h"
>> >  #include "i915_selftest.h"
>> >  
>> >  #include "mock_drm.h"
>> >  #include "mock_gem_device.h"
>> > +#include "igt_flush_test.h"
>> >  
>> >  static void cleanup_freed_objects(struct drm_i915_private *i915)
>> >  {
>> > @@ -1705,6 +1708,229 @@ int i915_gem_gtt_mock_selftests(void)
>> >   return err;
>> >  }
>> >  
>> > +static int context_sync(struct intel_context *ce)
>> > +{
>> > + struct i915_request *rq;
>> > + long timeout;
>> > +
>> > + rq = intel_context_create_request(ce);
>> > + if (IS_ERR(rq))
>> > + return PTR_ERR(rq);
>> > +
>> > + i915_request_get(rq);
>> > + i915_request_add(rq);
>> > +
>> > + timeout = i915_request_wait(rq, 0, HZ / 5);
>> > + i915_request_put(rq);
>> > +
>> > + return timeout < 0 ? -EIO : 0;
>> > +}
>> > +
>> > +static int submit_batch(struct intel_context *ce, u64 addr)
>> > +{
>> > + struct i915_request *rq;
>> > + int err;
>> > +
>> > + rq = intel_context_create_request(ce);
>> > + if (IS_ERR(rq))
>> > + return PTR_ERR(rq);
>> > +
>> > + err = 0;
>> > + if (rq->engine->emit_init_breadcrumb) /* detect a hang */
>> 
>> for seqno write?
>
> If we expect an initial breadcrumb, we use it during reset to identify
> if we are inside a payload (as opposed to being inside the semaphore
> wait). So we need to emit the breadcrumb if we may hang in our batch.
>
>> > + err = rq->engine->emit_init_breadcrumb(rq);
>> > + if (err == 0)
>> > + err = rq->engine->emit_bb_start(rq, addr, 0, 0);
>> > +
>> 
>> In context_sync part we grabbed a reference. In here we
>> don't. I don't see how we can get away without it even
>> if we don't wait in here.
>
> Hmm, I suppose you can argue that we do now have a later deref in the
> spinner. That wasn't there before...
>
>> > + vma = i915_vma_instance(out, vm, NULL);
>> > + if (IS_ERR(vma)) {
>> > + err = PTR_ERR(vma);
>> > + goto out_batch;
>> > + }
>> > +
>> > + err = i915_vma_pin(vma, 0, 0,
>> > +PIN_USER |
>> > +PIN_OFFSET_FIXED |
>> > +(vm->total - PAGE_SIZE));
>> > + if (err)
>> > + goto out_out;
>> 
>> out_put?
>
> Joonas likes out: for normal exit paths that double for error handling.

Ok. Fine with me. I just like the last part to describe what the first part
of onion out does. Don't have to so much scroll back and forth
in editor.

>
>> Oh and we don't have to do anything with the instance on
>> error paths?
>
> No. The vma does not yet have an independent lifetime from the object
> (they are all owned by objects currently).
>
>> > + /* Replace the TLB with target batches */
>> > + for (i = 0; i < count; i++) {
>> > + u32 *cs = batch + i * 64 / sizeof(*cs);
>> > + u64 addr;
>> > +
>> > + vma->node.start = offset + i * PAGE_SIZE;
>> 
>> on previous loop, we have now primed the pte to tlb in here?
>
> We're using the same PTE as before, in the expectation that the TLB
> still contains that lookup.
>
>> > + vm->insert_entries(vm, vma, I915_CACHE_NONE, 
>> > 0);
>> 
>> ..now changing in it here...
>> 
>> > +
>> > + addr = vma->node.start + i * 64;
>> > + cs[4] = MI_NOOP;
>> > + cs[6] = lower_32_bits(addr);
>> > + cs[7] = upper_32_bits(addr);
>> > + wmb();
>> > +
>> > + err = submit_batch(ce, addr);
>> 
>> in hope that with this submission hardware will see the old one and
>> completely miss the store+bb start?
>
> The hope is we see the right page of course! The test is to detect when
> it sees the old page instead.

Right, I guess it just depends who is hoping wrt to outcome =)

>  
>> Perhaps rewiring a more dummy emit only to prove this case
>> is pushing it.
>> 
>> 

Re: [Intel-gfx] [PATCH] drm/i915/selftests: Exercise CS TLB invalidation

2019-09-19 Thread Chris Wilson
Quoting Mika Kuoppala (2019-09-19 13:57:45)
> Chris Wilson  writes:
> 
> > Check that we are correctly invalidating the TLB at the start of a
> > batch after updating the GTT.
> >
> > Signed-off-by: Chris Wilson 
> > Cc: Mika Kuoppala 
> > Cc: Daniele Ceraolo Spurio 
> > ---
> >  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 227 ++
> >  1 file changed, 227 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c 
> > b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> > index 598c18d10640..f8709b332bd7 100644
> > --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> > @@ -25,13 +25,16 @@
> >  #include 
> >  #include 
> >  
> > +#include "gem/i915_gem_context.h"
> >  #include "gem/selftests/mock_context.h"
> > +#include "gt/intel_context.h"
> >  
> >  #include "i915_random.h"
> >  #include "i915_selftest.h"
> >  
> >  #include "mock_drm.h"
> >  #include "mock_gem_device.h"
> > +#include "igt_flush_test.h"
> >  
> >  static void cleanup_freed_objects(struct drm_i915_private *i915)
> >  {
> > @@ -1705,6 +1708,229 @@ int i915_gem_gtt_mock_selftests(void)
> >   return err;
> >  }
> >  
> > +static int context_sync(struct intel_context *ce)
> > +{
> > + struct i915_request *rq;
> > + long timeout;
> > +
> > + rq = intel_context_create_request(ce);
> > + if (IS_ERR(rq))
> > + return PTR_ERR(rq);
> > +
> > + i915_request_get(rq);
> > + i915_request_add(rq);
> > +
> > + timeout = i915_request_wait(rq, 0, HZ / 5);
> > + i915_request_put(rq);
> > +
> > + return timeout < 0 ? -EIO : 0;
> > +}
> > +
> > +static int submit_batch(struct intel_context *ce, u64 addr)
> > +{
> > + struct i915_request *rq;
> > + int err;
> > +
> > + rq = intel_context_create_request(ce);
> > + if (IS_ERR(rq))
> > + return PTR_ERR(rq);
> > +
> > + err = 0;
> > + if (rq->engine->emit_init_breadcrumb) /* detect a hang */
> 
> for seqno write?

If we expect an initial breadcrumb, we use it during reset to identify
if we are inside a payload (as opposed to being inside the semaphore
wait). So we need to emit the breadcrumb if we may hang in our batch.

> > + err = rq->engine->emit_init_breadcrumb(rq);
> > + if (err == 0)
> > + err = rq->engine->emit_bb_start(rq, addr, 0, 0);
> > +
> 
> In context_sync part we grabbed a reference. In here we
> don't. I don't see how we can get away without it even
> if we don't wait in here.

Hmm, I suppose you can argue that we do now have a later deref in the
spinner. That wasn't there before...

> > + vma = i915_vma_instance(out, vm, NULL);
> > + if (IS_ERR(vma)) {
> > + err = PTR_ERR(vma);
> > + goto out_batch;
> > + }
> > +
> > + err = i915_vma_pin(vma, 0, 0,
> > +PIN_USER |
> > +PIN_OFFSET_FIXED |
> > +(vm->total - PAGE_SIZE));
> > + if (err)
> > + goto out_out;
> 
> out_put?

Joonas likes out: for normal exit paths that double for error handling.

> Oh and we don't have to do anything with the instance on
> error paths?

No. The vma does not yet have an independent lifetime from the object
(they are all owned by objects currently).

> > + /* Replace the TLB with target batches */
> > + for (i = 0; i < count; i++) {
> > + u32 *cs = batch + i * 64 / sizeof(*cs);
> > + u64 addr;
> > +
> > + vma->node.start = offset + i * PAGE_SIZE;
> 
> on previous loop, we have now primed the pte to tlb in here?

We're using the same PTE as before, in the expectation that the TLB
still contains that lookup.

> > + vm->insert_entries(vm, vma, I915_CACHE_NONE, 
> > 0);
> 
> ..now changing in it here...
> 
> > +
> > + addr = vma->node.start + i * 64;
> > + cs[4] = MI_NOOP;
> > + cs[6] = lower_32_bits(addr);
> > + cs[7] = upper_32_bits(addr);
> > + wmb();
> > +
> > + err = submit_batch(ce, addr);
> 
> in hope that with this submission hardware will see the old one and
> completely miss the store+bb start?

The hope is we see the right page of course! The test is to detect when
it sees the old page instead.
 
> Perhaps rewiring a more dummy emit only to prove this case
> is pushing it.
> 
> But I am curious to know if you did try it out by removing
> the invalidate on the emits and managed to bring
> out the missing writes?

We can demonstrate it on Tigerlake :)

Indeed it detects the remove of MI_INVALIDATE_TLB elsewhere.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org

Re: [Intel-gfx] [PATCH] drm/i915/selftests: Exercise CS TLB invalidation

2019-09-19 Thread Mika Kuoppala
Chris Wilson  writes:

> Check that we are correctly invalidating the TLB at the start of a
> batch after updating the GTT.
>
> Signed-off-by: Chris Wilson 
> Cc: Mika Kuoppala 
> Cc: Daniele Ceraolo Spurio 
> ---
>  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 227 ++
>  1 file changed, 227 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c 
> b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 598c18d10640..f8709b332bd7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -25,13 +25,16 @@
>  #include 
>  #include 
>  
> +#include "gem/i915_gem_context.h"
>  #include "gem/selftests/mock_context.h"
> +#include "gt/intel_context.h"
>  
>  #include "i915_random.h"
>  #include "i915_selftest.h"
>  
>  #include "mock_drm.h"
>  #include "mock_gem_device.h"
> +#include "igt_flush_test.h"
>  
>  static void cleanup_freed_objects(struct drm_i915_private *i915)
>  {
> @@ -1705,6 +1708,229 @@ int i915_gem_gtt_mock_selftests(void)
>   return err;
>  }
>  
> +static int context_sync(struct intel_context *ce)
> +{
> + struct i915_request *rq;
> + long timeout;
> +
> + rq = intel_context_create_request(ce);
> + if (IS_ERR(rq))
> + return PTR_ERR(rq);
> +
> + i915_request_get(rq);
> + i915_request_add(rq);
> +
> + timeout = i915_request_wait(rq, 0, HZ / 5);
> + i915_request_put(rq);
> +
> + return timeout < 0 ? -EIO : 0;
> +}
> +
> +static int submit_batch(struct intel_context *ce, u64 addr)
> +{
> + struct i915_request *rq;
> + int err;
> +
> + rq = intel_context_create_request(ce);
> + if (IS_ERR(rq))
> + return PTR_ERR(rq);
> +
> + err = 0;
> + if (rq->engine->emit_init_breadcrumb) /* detect a hang */

for seqno write?

> + err = rq->engine->emit_init_breadcrumb(rq);
> + if (err == 0)
> + err = rq->engine->emit_bb_start(rq, addr, 0, 0);
> +

In context_sync part we grabbed a reference. In here we
don't. I don't see how we can get away without it even
if we don't wait in here.

> + i915_request_add(rq);
> +
> + return err;
> +}
> +
> +static int igt_cs_tlb(void *arg)
> +{
> + const unsigned int count = PAGE_SIZE / 64;
> + const unsigned int chunk_size = count * PAGE_SIZE;
> + struct drm_i915_private *i915 = arg;
> + struct drm_i915_gem_object *bbe, *out;
> + struct i915_gem_engines_iter it;
> + struct i915_address_space *vm;
> + struct i915_gem_context *ctx;
> + struct intel_context *ce;
> + struct drm_file *file;
> + struct i915_vma *vma;
> + unsigned int i;
> + u32 *result;
> + u32 *batch;
> + int err = 0;
> +
> + file = mock_file(i915);
> + if (IS_ERR(file))
> + return PTR_ERR(file);
> +
> + mutex_lock(>drm.struct_mutex);
> + ctx = live_context(i915, file);
> + if (IS_ERR(ctx)) {
> + err = PTR_ERR(ctx);
> + goto out_unlock;
> + }
> +
> + vm = ctx->vm;
> + if (!vm)
> + goto out_unlock;
> +
> + bbe = i915_gem_object_create_internal(i915, PAGE_SIZE);
> + if (IS_ERR(bbe)) {
> + err = PTR_ERR(bbe);
> + goto out_unlock;
> + }
> +
> + batch = i915_gem_object_pin_map(bbe, I915_MAP_WC);
> + if (IS_ERR(batch)) {
> + err = PTR_ERR(batch);
> + goto out_bbe;
> + }
> + for (i = 0; i < count; i++) {
> + u32 *cs = batch + i * 64 / sizeof(*cs);
> + u64 addr = (vm->total - PAGE_SIZE) + i * sizeof(u32);
> +
> + GEM_BUG_ON(INTEL_GEN(i915) < 6);
> + cs[0] = MI_STORE_DWORD_IMM_GEN4;
> + if (INTEL_GEN(i915) >= 8) {
> + cs[1] = lower_32_bits(addr);
> + cs[2] = upper_32_bits(addr);
> + cs[3] = i;
> + cs[4] = MI_NOOP;
> + cs[5] = MI_BATCH_BUFFER_START_GEN8;
> + } else {
> + cs[1] = 0;
> + cs[2] = lower_32_bits(addr);
> + cs[3] = i;
> + cs[4] = MI_NOOP;
> + cs[5] = MI_BATCH_BUFFER_START;
> + }
> + }
> +
> + out = i915_gem_object_create_internal(i915, PAGE_SIZE);
> + if (IS_ERR(out)) {
> + err = PTR_ERR(out);
> + goto out_batch;
> + }
> + i915_gem_object_set_cache_coherency(out, I915_CACHING_CACHED);
> +
> + vma = i915_vma_instance(out, vm, NULL);
> + if (IS_ERR(vma)) {
> + err = PTR_ERR(vma);
> + goto out_batch;
> + }
> +
> + err = i915_vma_pin(vma, 0, 0,
> +PIN_USER |
> +PIN_OFFSET_FIXED |
> +(vm->total - PAGE_SIZE));
> + if (err)
> + goto out_out;

out_put?

Oh and we don't have to do anything with the instance on
error paths?

> + 

[Intel-gfx] [PATCH] drm/i915/selftests: Exercise CS TLB invalidation

2019-09-15 Thread Chris Wilson
Check that we are correctly invalidating the TLB at the start of a
batch after updating the GTT.

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 227 ++
 1 file changed, 227 insertions(+)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 598c18d10640..f8709b332bd7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -25,13 +25,16 @@
 #include 
 #include 
 
+#include "gem/i915_gem_context.h"
 #include "gem/selftests/mock_context.h"
+#include "gt/intel_context.h"
 
 #include "i915_random.h"
 #include "i915_selftest.h"
 
 #include "mock_drm.h"
 #include "mock_gem_device.h"
+#include "igt_flush_test.h"
 
 static void cleanup_freed_objects(struct drm_i915_private *i915)
 {
@@ -1705,6 +1708,229 @@ int i915_gem_gtt_mock_selftests(void)
return err;
 }
 
+static int context_sync(struct intel_context *ce)
+{
+   struct i915_request *rq;
+   long timeout;
+
+   rq = intel_context_create_request(ce);
+   if (IS_ERR(rq))
+   return PTR_ERR(rq);
+
+   i915_request_get(rq);
+   i915_request_add(rq);
+
+   timeout = i915_request_wait(rq, 0, HZ / 5);
+   i915_request_put(rq);
+
+   return timeout < 0 ? -EIO : 0;
+}
+
+static int submit_batch(struct intel_context *ce, u64 addr)
+{
+   struct i915_request *rq;
+   int err;
+
+   rq = intel_context_create_request(ce);
+   if (IS_ERR(rq))
+   return PTR_ERR(rq);
+
+   err = 0;
+   if (rq->engine->emit_init_breadcrumb) /* detect a hang */
+   err = rq->engine->emit_init_breadcrumb(rq);
+   if (err == 0)
+   err = rq->engine->emit_bb_start(rq, addr, 0, 0);
+
+   i915_request_add(rq);
+
+   return err;
+}
+
+static int igt_cs_tlb(void *arg)
+{
+   const unsigned int count = PAGE_SIZE / 64;
+   const unsigned int chunk_size = count * PAGE_SIZE;
+   struct drm_i915_private *i915 = arg;
+   struct drm_i915_gem_object *bbe, *out;
+   struct i915_gem_engines_iter it;
+   struct i915_address_space *vm;
+   struct i915_gem_context *ctx;
+   struct intel_context *ce;
+   struct drm_file *file;
+   struct i915_vma *vma;
+   unsigned int i;
+   u32 *result;
+   u32 *batch;
+   int err = 0;
+
+   file = mock_file(i915);
+   if (IS_ERR(file))
+   return PTR_ERR(file);
+
+   mutex_lock(>drm.struct_mutex);
+   ctx = live_context(i915, file);
+   if (IS_ERR(ctx)) {
+   err = PTR_ERR(ctx);
+   goto out_unlock;
+   }
+
+   vm = ctx->vm;
+   if (!vm)
+   goto out_unlock;
+
+   bbe = i915_gem_object_create_internal(i915, PAGE_SIZE);
+   if (IS_ERR(bbe)) {
+   err = PTR_ERR(bbe);
+   goto out_unlock;
+   }
+
+   batch = i915_gem_object_pin_map(bbe, I915_MAP_WC);
+   if (IS_ERR(batch)) {
+   err = PTR_ERR(batch);
+   goto out_bbe;
+   }
+   for (i = 0; i < count; i++) {
+   u32 *cs = batch + i * 64 / sizeof(*cs);
+   u64 addr = (vm->total - PAGE_SIZE) + i * sizeof(u32);
+
+   GEM_BUG_ON(INTEL_GEN(i915) < 6);
+   cs[0] = MI_STORE_DWORD_IMM_GEN4;
+   if (INTEL_GEN(i915) >= 8) {
+   cs[1] = lower_32_bits(addr);
+   cs[2] = upper_32_bits(addr);
+   cs[3] = i;
+   cs[4] = MI_NOOP;
+   cs[5] = MI_BATCH_BUFFER_START_GEN8;
+   } else {
+   cs[1] = 0;
+   cs[2] = lower_32_bits(addr);
+   cs[3] = i;
+   cs[4] = MI_NOOP;
+   cs[5] = MI_BATCH_BUFFER_START;
+   }
+   }
+
+   out = i915_gem_object_create_internal(i915, PAGE_SIZE);
+   if (IS_ERR(out)) {
+   err = PTR_ERR(out);
+   goto out_batch;
+   }
+   i915_gem_object_set_cache_coherency(out, I915_CACHING_CACHED);
+
+   vma = i915_vma_instance(out, vm, NULL);
+   if (IS_ERR(vma)) {
+   err = PTR_ERR(vma);
+   goto out_batch;
+   }
+
+   err = i915_vma_pin(vma, 0, 0,
+  PIN_USER |
+  PIN_OFFSET_FIXED |
+  (vm->total - PAGE_SIZE));
+   if (err)
+   goto out_out;
+   GEM_BUG_ON(vma->node.start != vm->total - PAGE_SIZE);
+
+   result = i915_gem_object_pin_map(out, I915_MAP_WB);
+   if (IS_ERR(result)) {
+   err = PTR_ERR(result);
+   goto out_out;
+   }
+
+   for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+   IGT_TIMEOUT(end_time);
+   unsigned long pass = 0;
+
+