Measure the baseline latency between contexts in order to directly
compare that with the additional cost of preemption.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 tests/i915/gem_exec_latency.c | 230 ++++++++++++++++++++++++++++++++++
 1 file changed, 230 insertions(+)

diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
index e56d62780..e88fbbc6a 100644
--- a/tests/i915/gem_exec_latency.c
+++ b/tests/i915/gem_exec_latency.c
@@ -410,6 +410,86 @@ static void latency_from_ring(int fd,
        }
 }
 
+static void execution_latency(int i915, unsigned int ring, const char *name)
+{
+       struct drm_i915_gem_exec_object2 obj = {
+               .handle = gem_create(i915, 4095),
+       };
+       struct drm_i915_gem_execbuffer2 execbuf = {
+               .buffers_ptr = to_user_pointer(&obj),
+               .buffer_count = 1,
+               .flags = ring | LOCAL_I915_EXEC_NO_RELOC | 
LOCAL_I915_EXEC_HANDLE_LUT,
+       };
+       const unsigned int mmio_base = 0x2000;
+       const unsigned int cs_timestamp = mmio_base + 0x358;
+       volatile uint32_t *timestamp;
+       uint32_t *cs, *result;
+
+       timestamp =
+               (volatile uint32_t *)((volatile char *)igt_global_mmio + 
cs_timestamp);
+
+       obj.handle = gem_create(i915, 4096);
+       obj.flags = EXEC_OBJECT_PINNED;
+       result = gem_mmap__wc(i915, obj.handle, 0, 4096, PROT_WRITE);
+
+       for (int i = 0; i < 16; i++) {
+               cs = result + 16 * i;
+               *cs++ = 0x24 << 23 | 2; /* SRM */
+               *cs++ = cs_timestamp;
+               *cs++ = 4096 - 16 * 4 + i * 4;
+               *cs++ = 0;
+               *cs++ = 0xa << 23;
+       }
+
+       cs = result + 1024 - 16;
+
+       for (int length = 2; length <= 16; length <<= 1) {
+               struct igt_mean submit, batch, total;
+               int last = length - 1;
+
+               igt_mean_init(&submit);
+               igt_mean_init(&batch);
+               igt_mean_init(&total);
+
+               igt_until_timeout(2) {
+                       uint32_t now, end;
+
+                       cs[last] = 0;
+
+                       now = *timestamp;
+                       for (int i = 0; i < length; i++) {
+                               execbuf.batch_start_offset = 64 * i;
+                               gem_execbuf(i915, &execbuf);
+                       }
+                       while (!((volatile uint32_t *)cs)[last])
+                               ;
+                       end = *timestamp;
+
+                       igt_mean_add(&submit, (cs[0] - now) * rcs_clock);
+                       igt_mean_add(&batch, (cs[last] - cs[0]) * rcs_clock / 
last);
+                       igt_mean_add(&total, (end - now) * rcs_clock);
+               }
+
+               igt_info("%sx%d Submission latency: %.2f±%.2fus\n",
+                        name, length,
+                        1e-3 * igt_mean_get(&submit),
+                        1e-3 * sqrt(igt_mean_get_variance(&submit)));
+
+               igt_info("%sx%d Inter-batch latency: %.2f±%.2fus\n",
+                        name, length,
+                        1e-3 * igt_mean_get(&batch),
+                        1e-3 * sqrt(igt_mean_get_variance(&batch)));
+
+               igt_info("%sx%d End-to-end latency: %.2f±%.2fus\n",
+                        name, length,
+                        1e-3 * igt_mean_get(&total),
+                        1e-3 * sqrt(igt_mean_get_variance(&total)));
+       }
+
+       munmap(result, 4096);
+       gem_close(i915, obj.handle);
+}
+
 static void
 __submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
 {
@@ -616,6 +696,142 @@ rthog_latency_on_ring(int fd, unsigned int engine, const 
char *name, unsigned in
        munmap(results, MMAP_SZ);
 }
 
+static void context_switch(int i915,
+                          unsigned int engine, const char *name,
+                          unsigned int flags)
+{
+       struct drm_i915_gem_exec_object2 obj[2];
+       struct drm_i915_gem_relocation_entry reloc[5];
+       struct drm_i915_gem_execbuffer2 eb;
+       uint32_t *cs, *bbe, *results, v;
+       unsigned int mmio_base;
+       struct igt_mean mean;
+       uint32_t ctx[2];
+
+       /* XXX i915_query()! */
+       switch (engine) {
+       case I915_EXEC_DEFAULT:
+       case I915_EXEC_RENDER:
+               mmio_base = 0x2000;
+               break;
+#if 0
+       case I915_EXEC_BSD:
+               mmio_base = 0x12000;
+               break;
+#endif
+       case I915_EXEC_BLT:
+               mmio_base = 0x22000;
+               break;
+
+       case I915_EXEC_VEBOX:
+               if (intel_gen(intel_get_drm_devid(i915)) >= 11)
+                       mmio_base = 0x1d8000;
+               else
+                       mmio_base = 0x1a000;
+               break;
+
+       default:
+               igt_skip("mmio base not known\n");
+       }
+
+       for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+               ctx[i] = gem_context_create(i915);
+
+       if (flags & PREEMPT) {
+               gem_context_set_priority(i915, ctx[0], -1023);
+               gem_context_set_priority(i915, ctx[1], +1023);
+       }
+
+       memset(obj, 0, sizeof(obj));
+       obj[0].handle = gem_create(i915, 4096);
+       gem_set_caching(i915, obj[0].handle, 1);
+       results = gem_mmap__cpu(i915, obj[0].handle, 0, 4096, PROT_READ);
+       gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
+
+       obj[1].handle = gem_create(i915, 4096);
+       memset(reloc,0, sizeof(reloc));
+       obj[1].relocation_count = ARRAY_SIZE(reloc);
+       obj[1].relocs_ptr = to_user_pointer(reloc);
+       bbe = gem_mmap__wc(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+       gem_set_domain(i915, obj[1].handle,
+                      I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+       cs = bbe;
+       *cs++ = 0x5 << 23;
+       *cs++ = 0x24 << 23 | 2; /* SRM */
+       *cs++ = mmio_base + 0x358; /* TIMESTAMP */
+       reloc[0].target_handle = obj[0].handle;
+       reloc[0].offset = (cs - bbe) * sizeof(*cs);
+       *cs++ = 0;
+       *cs++ = 0;
+       *cs++ = MI_BATCH_BUFFER_START | 1;
+       reloc[1].target_handle = obj[1].handle;
+       reloc[1].offset = (cs - bbe) * sizeof(*cs);
+       *cs++ = 0;
+       *cs++ = 0;
+
+       cs = bbe + 64;
+       *cs++ = 0x24 << 23 | 2; /* SRM */
+       *cs++ = mmio_base + 0x358; /* TIMESTAMP */
+       reloc[2].target_handle = obj[0].handle;
+       reloc[2].offset = (cs - bbe) * sizeof(*cs);
+       *cs++ = reloc[2].delta = 4;
+       *cs++ = 0;
+       *cs++ = 0x29 << 23 | 2; /* LRM */
+       *cs++ = mmio_base + 0x600; /* GPR0 */
+       reloc[3].target_handle = obj[0].handle;
+       reloc[3].offset = (cs - bbe) * sizeof(*cs);
+       *cs++ = 0;
+       *cs++ = 0;
+       *cs++ = 0x24 << 23 | 2; /* SRM */
+       *cs++ = mmio_base + 0x600; /* GPR0 */
+       reloc[4].target_handle = obj[0].handle;
+       reloc[4].offset = (cs - bbe) * sizeof(*cs);
+       *cs++ = reloc[4].delta = 8;
+       *cs++ = 0;
+       *cs++ = 0xa << 23;
+
+       memset(&eb, 0, sizeof(eb));
+       eb.buffers_ptr = to_user_pointer(obj);
+       eb.buffer_count = ARRAY_SIZE(obj);
+       eb.flags = engine;
+       eb.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+       v = 0;
+       igt_mean_init(&mean);
+       igt_until_timeout(5) {
+               eb.rsvd1 = ctx[0];
+               eb.batch_start_offset = 0;
+               gem_execbuf(i915, &eb);
+
+               while (results[0] == v)
+                       igt_assert(gem_bo_busy(i915, obj[1].handle));
+
+               eb.rsvd1 = ctx[1];
+               eb.batch_start_offset = 64 * sizeof(*cs);
+               gem_execbuf(i915, &eb);
+
+               *bbe = 0xa << 23;
+               gem_sync(i915, obj[1].handle);
+               *bbe = 0x5 << 23;
+
+               v = results[0];
+               igt_mean_add(&mean, (results[1] - results[2]) * rcs_clock);
+       }
+       igt_info("%s context switch latency%s: %.2f±%.2fus\n",
+                name, flags & PREEMPT ? " (preempt)" : "",
+                1e-3 * igt_mean_get(&mean),
+                1e-3 * sqrt(igt_mean_get_variance(&mean)));
+       munmap(results, 4096);
+       munmap(bbe, 4096);
+
+       for (int i = 0; i < ARRAY_SIZE(obj); i++)
+               gem_close(i915, obj[i].handle);
+
+       for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+               gem_context_destroy(i915, ctx[i]);
+}
+
 static double clockrate(int i915, int reg)
 {
        volatile uint32_t *mmio;
@@ -722,6 +938,11 @@ igt_main
                                                              e->name,
                                                              0);
 
+                               igt_subtest_f("%s-execution-latency", e->name)
+                                       execution_latency(device,
+                                                         e->exec_id | e->flags,
+                                                         e->name);
+
                                igt_subtest_f("%s-live-dispatch-queued", 
e->name)
                                        latency_on_ring(device,
                                                        e->exec_id | e->flags,
@@ -741,12 +962,21 @@ igt_main
                                                          e->exec_id | e->flags,
                                                          e->name, CORK);
 
+                               igt_subtest_f("%s-cs", e->name)
+                                       context_switch(device,
+                                                      e->exec_id | e->flags,
+                                                      e->name, 0);
                                igt_subtest_group {
                                        igt_fixture {
                                                gem_require_contexts(device);
                                                
igt_require(gem_scheduler_has_preemption(device));
                                        }
 
+                                       igt_subtest_f("%s-cs-preempt", e->name)
+                                               context_switch(device,
+                                                               e->exec_id | 
e->flags,
+                                                               e->name, 
PREEMPT);
+
                                        igt_subtest_f("%s-preemption", e->name)
                                                latency_from_ring(device,
                                                                  e->exec_id | 
e->flags,
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to