Re: [Intel-gfx] [PATCH i-g-t v7] tests/perf_pmu: Improve accuracy by waiting on spinner to start

Chris Wilson Mon, 19 Mar 2018 08:35:25 -0700

Quoting Chris Wilson (2018-03-19 15:29:21)
> Quoting Tvrtko Ursulin (2018-03-19 13:56:05)
> > @@ -443,15 +501,12 @@ most_busy_check_all(int gem_fd, const struct 
> > intel_execution_engine2 *e,
> >                 if (!gem_has_engine(gem_fd, e_->class, e_->instance))
> >                         continue;
> >  
> > -               if (e == e_) {
> > +               if (e == e_)
> >                         idle_idx = i;
> > -               } else if (spin) {
> > -                       __submit_spin_batch(gem_fd, &obj, e_);
> > -               } else {
> > -                       spin = igt_spin_batch_new(gem_fd, 0,
> > -                                                 e2ring(gem_fd, e_), 0);
> > -                       obj.handle = spin->handle;
> > -               }
> > +               else if (spin)
> > +                       __submit_spin_batch(gem_fd, spin, e_);
> > +               else
> > +                       spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e_));
> 
> So this is what is killing snb. We resubmit the spin-batch, with its
> MI_STORE_DWORD_IMM intact, onto each ring. Instant machine death for snb
> when we reach vcs.
> 
> If we tweak the spinner to jump to a location 64bytes past the start, we
> can opt out of the MI_STORE_DW when not required. Let me go an cook up a
> delta.


diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
index b7a89fd..2a3c3b5 100644
--- a/lib/igt_dummyload.c
+++ b/lib/igt_dummyload.c
@@ -84,7 +84,7 @@ emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, 
unsigned engine,
 #define SCRATCH 0
 #define BATCH 1
        const int gen = intel_gen(intel_get_drm_devid(fd));
-       struct drm_i915_gem_relocation_entry relocs[2];
+       struct drm_i915_gem_relocation_entry relocs[2], *r;
        struct drm_i915_gem_execbuffer2 *execbuf;
        struct drm_i915_gem_exec_object2 *obj;
        unsigned int engines[16];
@@ -182,7 +182,7 @@ emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t 
ctx, unsigned engine,
                execbuf->buffer_count++;
        }
 
-       spin->batch = batch;
+       spin->batch = batch_start + 64/sizeof(*batch);
        spin->handle = obj[BATCH].handle;
 
        /* Allow ourselves to be preempted */
@@ -202,26 +202,25 @@ emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t 
ctx, unsigned engine,
        batch += 1000;
 
        /* recurse */
-       fill_reloc(&relocs[obj[BATCH].relocation_count],
-                  obj[BATCH].handle, (batch - batch_start) + 1,
-                  I915_GEM_DOMAIN_COMMAND, 0);
+       r = &relocs[obj[BATCH].relocation_count++];
+       r->target_handle = obj[BATCH].handle;
+       r->offset = (batch + 1 - batch_start) * sizeof(*batch);
+       r->read_domains = I915_GEM_DOMAIN_COMMAND;
+       r->delta = 64;
        if (gen >= 8) {
                *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
-               *batch++ = 0;
+               *batch++ = r->delta;
                *batch++ = 0;
        } else if (gen >= 6) {
                *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
-               *batch++ = 0;
+               *batch++ = r->delta;
        } else {
                *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
-               *batch = 0;
-               if (gen < 4) {
-                       *batch |= 1;
-                       relocs[obj[BATCH].relocation_count].delta = 1;
-               }
+               if (gen < 4)
+                       r->delta |= 1;
+               *batch = r->delta;
                batch++;
        }
-       obj[BATCH].relocation_count++;
        obj[BATCH].relocs_ptr = to_user_pointer(relocs);
 
        execbuf->buffers_ptr = to_user_pointer(obj +
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 87875fb..469b9be 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -474,12 +474,14 @@ busy_check_all(int gem_fd, const struct 
intel_execution_engine2 *e,
 
 static void
 __submit_spin_batch(int gem_fd, igt_spin_t *spin,
-                   const struct intel_execution_engine2 *e)
+                   const struct intel_execution_engine2 *e,
+                   int offset)
 {
        struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
 
        eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
        eb.flags |= e2ring(gem_fd, e) | I915_EXEC_NO_RELOC;
+       eb.batch_start_offset += offset;
 
        gem_execbuf(gem_fd, &eb);
 }
@@ -504,7 +506,7 @@ most_busy_check_all(int gem_fd, const struct 
intel_execution_engine2 *e,
                if (e == e_)
                        idle_idx = i;
                else if (spin)
-                       __submit_spin_batch(gem_fd, spin, e_);
+                       __submit_spin_batch(gem_fd, spin, e_, 64);
                else
                        spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e_));
 
@@ -561,7 +563,7 @@ all_busy_check_all(int gem_fd, const unsigned int 
num_engines,
                        continue;
 
                if (spin)
-                       __submit_spin_batch(gem_fd, spin, e);
+                       __submit_spin_batch(gem_fd, spin, e, 64);
                else
                        spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e));
 
@@ -1613,7 +1615,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 
*e,
                                /* Restart the spinbatch. */
                                igt_nsec_elapsed(&start);
                                __rearm_spin_batch(spin);
-                               __submit_spin_batch(gem_fd, spin, e);
+                               __submit_spin_batch(gem_fd, spin, e, 0);
 
                                 /* Wait for batch to start executing. */
                                __spin_wait(gem_fd, spin);
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH i-g-t v7] tests/perf_pmu: Improve accuracy by waiting on spinner to start

Reply via email to