From: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

Support i915 virtual engine from gem_wsim (-b i915) and media-bench.pl

Signed-off-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
---
 benchmarks/gem_wsim.c  | 289 ++++++++++++++++++++++++++++++++++-------
 scripts/media-bench.pl |   9 +-
 2 files changed, 251 insertions(+), 47 deletions(-)

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index e0709487897b..e1c73855150b 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -59,6 +59,20 @@
 #define LOCAL_I915_EXEC_FENCE_IN              (1<<16)
 #define LOCAL_I915_EXEC_FENCE_OUT             (1<<17)
 
+struct local_drm_i915_gem_context_create_v2 {
+       /*  output: id of new context*/
+       __u32 ctx_id;
+       __u32 flags;
+#define LOCAL_I915_GEM_CONTEXT_SHARE_GTT       0x1
+#define LOCAL_I915_GEM_CONTEXT_SINGLE_TIMELINE 0x2
+       __u32 share_ctx;
+       __u32 pad;
+};
+
+#define LOCAL_DRM_IOCTL_I915_GEM_CONTEXT_CREATE        DRM_IOWR 
(DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct 
local_drm_i915_gem_context_create_v2)
+
+#define LOCAL_I915_CONTEXT_PARAM_ENGINES       0x7
+
 enum intel_engine_id {
        RCS,
        BCS,
@@ -143,6 +157,14 @@ struct w_step
 
 DECLARE_EWMA(uint64_t, rt, 4, 2)
 
+struct ctx {
+       uint32_t id;
+       int priority;
+       bool targets_instance;
+       bool wants_balance;
+       unsigned int static_vcs;
+};
+
 struct workload
 {
        unsigned int id;
@@ -164,11 +186,7 @@ struct workload
        struct timespec repeat_start;
 
        unsigned int nr_ctxs;
-       struct {
-               uint32_t id;
-               int priority;
-               unsigned int static_vcs;
-       } *ctx_list;
+       struct ctx *ctx_list;
 
        int sync_timeline;
        uint32_t sync_seqno;
@@ -225,6 +243,7 @@ static int fd;
 #define HEARTBEAT      (1<<7)
 #define GLOBAL_BALANCE (1<<8)
 #define DEPSYNC                (1<<9)
+#define I915           (1<<10)
 
 #define SEQNO_IDX(engine) ((engine) * 16)
 #define SEQNO_OFFSET(engine) (SEQNO_IDX(engine) * sizeof(uint32_t))
@@ -836,7 +855,11 @@ eb_set_engine(struct drm_i915_gem_execbuffer2 *eb,
        if (engine == VCS2 && (flags & VCS2REMAP))
                engine = BCS;
 
-       eb->flags = eb_engine_map[engine];
+       if ((flags & I915) && engine == VCS) {
+               eb->flags = 0;
+       } else {
+               eb->flags = eb_engine_map[engine];
+       }
 }
 
 static void
@@ -862,6 +885,23 @@ get_status_objects(struct workload *wrk)
                return wrk->status_object;
 }
 
+static struct ctx *
+__get_ctx(struct workload *wrk, struct w_step *w)
+{
+       return &wrk->ctx_list[w->context * 2];
+}
+
+static uint32_t
+get_ctxid(struct workload *wrk, struct w_step *w)
+{
+       struct ctx *ctx = __get_ctx(wrk, w);
+
+       if (ctx->targets_instance && ctx->wants_balance && w->engine == VCS)
+               return wrk->ctx_list[w->context * 2 + 1].id;
+       else
+               return wrk->ctx_list[w->context * 2].id;
+}
+
 static void
 alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
 {
@@ -914,7 +954,7 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, 
unsigned int flags)
 
        w->eb.buffers_ptr = to_user_pointer(w->obj);
        w->eb.buffer_count = j + 1;
-       w->eb.rsvd1 = wrk->ctx_list[w->context].id;
+       w->eb.rsvd1 = get_ctxid(wrk, w);
 
        if (flags & SWAPVCS && engine == VCS1)
                engine = VCS2;
@@ -927,17 +967,29 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, 
unsigned int flags)
                printf("%x|", w->obj[i].handle);
        printf(" %10lu flags=%llx bb=%x[%u] ctx[%u]=%u\n",
                w->bb_sz, w->eb.flags, w->bb_handle, j, w->context,
-               wrk->ctx_list[w->context].id);
+               get_ctxid(wrk, w));
 #endif
 }
 
+static void __ctx_set_prio(uint32_t ctx_id, unsigned int prio)
+{
+       struct drm_i915_gem_context_param param = {
+               .ctx_id = ctx_id,
+               .param = I915_CONTEXT_PARAM_PRIORITY,
+               .value = prio,
+       };
+
+       if (prio)
+               gem_context_set_param(fd, &param);
+}
+
 static void
 prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 {
        unsigned int ctx_vcs = 0;
        int max_ctx = -1;
        struct w_step *w;
-       int i;
+       int i, j;
 
        wrk->id = id;
        wrk->prng = rand();
@@ -968,44 +1020,174 @@ prepare_workload(unsigned int id, struct workload *wrk, 
unsigned int flags)
                }
        }
 
+       /*
+        * Pre-scan workload steps to allocate context list storage.
+        */
        for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
-               if ((int)w->context > max_ctx) {
-                       int delta = w->context + 1 - wrk->nr_ctxs;
+               int ctx = w->context * 2 + 1; /* Odd slots are special. */
+               int delta;
+
+               if (ctx <= max_ctx)
+                       continue;
+
+               delta = ctx + 1 - wrk->nr_ctxs;
+
+               wrk->nr_ctxs += delta;
+               wrk->ctx_list = realloc(wrk->ctx_list,
+                                       wrk->nr_ctxs * sizeof(*wrk->ctx_list));
+               memset(&wrk->ctx_list[wrk->nr_ctxs - delta], 0,
+                       delta * sizeof(*wrk->ctx_list));
+
+               max_ctx = ctx;
+       }
+
+       /*
+        * Identify if contexts target specific engine instances and if they
+        * want to be balanced.
+        */
+       for (j = 0; j < wrk->nr_ctxs; j += 2) {
+               bool targets = false;
+               bool balance = false;
+
+               for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
+                       if (w->type != BATCH)
+                               continue;
+
+                       if (w->context != (j / 2))
+                               continue;
 
-                       wrk->nr_ctxs += delta;
-                       wrk->ctx_list = realloc(wrk->ctx_list,
-                                               wrk->nr_ctxs *
-                                               sizeof(*wrk->ctx_list));
-                       memset(&wrk->ctx_list[wrk->nr_ctxs - delta], 0,
-                              delta * sizeof(*wrk->ctx_list));
+                       if (w->engine == VCS)
+                               balance = true;
+                       else
+                               targets = true;
+               }
 
-                       max_ctx = w->context;
+               if (flags & I915) {
+                       wrk->ctx_list[j].targets_instance = targets;
+                       wrk->ctx_list[j].wants_balance = balance;
                }
+       }
 
-               if (!wrk->ctx_list[w->context].id) {
-                       struct drm_i915_gem_context_create arg = {};
+       /*
+        * Create and configure contexts.
+        */
+       for (i = 0; i < wrk->nr_ctxs; i += 2) {
+               struct ctx *ctx = &wrk->ctx_list[i];
+               uint32_t ctx_id, share_ctx = 0;
 
-                       drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &arg);
-                       igt_assert(arg.ctx_id);
+               if (ctx->id)
+                       continue;
 
-                       wrk->ctx_list[w->context].id = arg.ctx_id;
+               if (flags & I915) {
+                       struct local_drm_i915_gem_context_create_v2 args = { };
 
-                       if (flags & GLOBAL_BALANCE) {
-                               wrk->ctx_list[w->context].static_vcs = 
context_vcs_rr;
-                               context_vcs_rr ^= 1;
-                       } else {
-                               wrk->ctx_list[w->context].static_vcs = ctx_vcs;
-                               ctx_vcs ^= 1;
-                       }
+                       /* Find existing context to share ppgtt with. */
+                       for (j = 0; j < wrk->nr_ctxs; j++) {
+                               if (!wrk->ctx_list[j].id)
+                                       continue;
 
-                       if (wrk->prio) {
-                               struct drm_i915_gem_context_param param = {
-                                       .ctx_id = arg.ctx_id,
-                                       .param = I915_CONTEXT_PARAM_PRIORITY,
-                                       .value = wrk->prio,
-                               };
-                               gem_context_set_param(fd, &param);
+                               args.flags |= LOCAL_I915_GEM_CONTEXT_SHARE_GTT;
+                               args.share_ctx = share_ctx =
+                                       wrk->ctx_list[j].id;
+                               break;
                        }
+
+                       if (!ctx->targets_instance)
+                               args.flags |= 
LOCAL_I915_GEM_CONTEXT_SINGLE_TIMELINE;
+
+                       drmIoctl(fd, LOCAL_DRM_IOCTL_I915_GEM_CONTEXT_CREATE,
+                                &args);
+
+                       ctx_id = args.ctx_id;
+               } else {
+                       struct drm_i915_gem_context_create args = {};
+
+                       drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &args);
+                       ctx_id = args.ctx_id;
+               }
+
+               igt_assert(ctx_id);
+               ctx->id = ctx_id;
+
+               if (flags & GLOBAL_BALANCE) {
+                       ctx->static_vcs = context_vcs_rr;
+                       context_vcs_rr ^= 1;
+               } else {
+                       ctx->static_vcs = ctx_vcs;
+                       ctx_vcs ^= 1;
+               }
+
+               __ctx_set_prio(ctx_id, wrk->prio);
+
+               /*
+                * Do we need a separate context to satisfy this workloads which
+                * both want to target specific engines and be balanced by i915?
+                */
+               if ((flags & I915) && ctx->wants_balance &&
+                   ctx->targets_instance) {
+                       struct local_drm_i915_gem_context_create_v2 args = {};
+
+                       igt_assert(share_ctx);
+
+                       args.flags = LOCAL_I915_GEM_CONTEXT_SINGLE_TIMELINE |
+                                    LOCAL_I915_GEM_CONTEXT_SHARE_GTT;
+                       args.share_ctx = share_ctx;
+
+                       drmIoctl(fd, LOCAL_DRM_IOCTL_I915_GEM_CONTEXT_CREATE,
+                                &args);
+
+                       igt_assert(args.ctx_id);
+                       ctx_id = args.ctx_id;
+                       wrk->ctx_list[i + 1].id = args.ctx_id;
+
+                       __ctx_set_prio(ctx_id, wrk->prio);
+               }
+
+               if (ctx->wants_balance) {
+                       #define LOCAL_I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0
+
+                       struct local_i915_user_extension {
+                               __u64 next_extension;
+                               __u64 name;
+                       };
+
+                       struct local_i915_context_engines_load_balance {
+                               struct local_i915_user_extension base;
+
+                               __u64 flags; /* all undefined flags must be 
zero */
+                               __u64 engines_mask;
+
+                               __u64 mbz[4]; /* reserved for future use; must 
be zero */
+                       } load_balance = {
+                               .base.name = 
LOCAL_I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
+                               .engines_mask = -1,
+                       };
+
+                       struct local_i915_context_param_engines {
+                               __u64 extensions;
+
+                               struct {
+                                       __u16 class; /* see enum 
drm_i915_gem_engine_class */
+                                       __u16 instance;
+                               } engines[2];
+                       } __attribute__((packed)) set_engines = {
+                               .extensions = to_user_pointer(&load_balance),
+                               .engines = {
+                                       { .class = I915_ENGINE_CLASS_VIDEO,
+                                         .instance = 0 },
+                                       { .class = I915_ENGINE_CLASS_VIDEO,
+                                         .instance = 1 },
+                               },
+                       };
+
+                       struct drm_i915_gem_context_param param = {
+                               .ctx_id = ctx_id,
+                               .param = LOCAL_I915_CONTEXT_PARAM_ENGINES,
+                               .size = sizeof(set_engines),
+                               .value = to_user_pointer(&set_engines),
+                       };
+
+                       gem_context_set_param(fd, &param);
                }
        }
 
@@ -1380,7 +1562,7 @@ static enum intel_engine_id
 context_balance(const struct workload_balancer *balancer,
                struct workload *wrk, struct w_step *w)
 {
-       return get_vcs_engine(wrk->ctx_list[w->context].static_vcs);
+       return get_vcs_engine(__get_ctx(wrk, w)->static_vcs);
 }
 
 static unsigned int
@@ -1574,6 +1756,12 @@ static const struct workload_balancer all_balancers[] = {
                .get_qd = get_engine_busy,
                .balance = busy_avg_balance,
        },
+       {
+               .id = 11,
+               .name = "i915",
+               .desc = "i915 balancing.",
+               .flags = I915,
+       },
 };
 
 static unsigned int
@@ -1952,7 +2140,8 @@ static void *run_workload(void *data)
                        last_sync = false;
 
                        wrk->nr_bb[engine]++;
-                       if (engine == VCS && wrk->balancer) {
+                       if (engine == VCS && wrk->balancer &&
+                           wrk->balancer->balance) {
                                engine = wrk->balancer->balance(wrk->balancer,
                                                                wrk, w);
                                wrk->nr_bb[engine]++;
@@ -2379,6 +2568,12 @@ int main(int argc, char **argv)
                return 1;
        }
 
+       if ((flags & VCS2REMAP) && (flags & I915)) {
+               if (verbose)
+                       fprintf(stderr, "VCS remapping not supported with i915 
balancing!\n");
+               return 1;
+       }
+
        if (!nop_calibration) {
                if (verbose > 1)
                        printf("Calibrating nop delay with %u%% tolerance...\n",
@@ -2464,11 +2659,17 @@ int main(int argc, char **argv)
                printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
                if (flags & SWAPVCS)
                        printf("Swapping VCS rings between clients.\n");
-               if (flags & GLOBAL_BALANCE)
-                       printf("Using %s balancer in global mode.\n",
-                              balancer->name);
-               else if (balancer)
+               if (flags & GLOBAL_BALANCE) {
+                       if (flags & I915) {
+                               printf("Ignoring global balancing with 
i915!\n");
+                               flags &= ~GLOBAL_BALANCE;
+                       } else {
+                               printf("Using %s balancer in global mode.\n",
+                                      balancer->name);
+                       }
+               } else if (balancer) {
                        printf("Using %s balancer.\n", balancer->name);
+               }
        }
 
        if (master_workload >= 0 && clients == 1)
@@ -2485,7 +2686,7 @@ int main(int argc, char **argv)
                if (flags & SWAPVCS && i & 1)
                        flags_ &= ~SWAPVCS;
 
-               if (flags & GLOBAL_BALANCE) {
+               if ((flags & GLOBAL_BALANCE) && !(flags & I915)) {
                        w[i]->balancer = &global_balancer;
                        w[i]->global_wrk = w[0];
                        w[i]->global_balancer = balancer;
diff --git a/scripts/media-bench.pl b/scripts/media-bench.pl
index 066b542f95df..ddf9c0ec05c8 100755
--- a/scripts/media-bench.pl
+++ b/scripts/media-bench.pl
@@ -49,10 +49,11 @@ my $nop;
 my %opts;
 
 my @balancers = ( 'rr', 'rand', 'qd', 'qdr', 'qdavg', 'rt', 'rtr', 'rtavg',
-                 'context', 'busy', 'busy-avg' );
+                 'context', 'busy', 'busy-avg', 'i915' );
 my %bal_skip_H = ( 'rr' => 1, 'rand' => 1, 'context' => 1, , 'busy' => 1,
-                  'busy-avg' => 1 );
-my %bal_skip_R = ( 'context' => 1 );
+                  'busy-avg' => 1, 'i915' => 1 );
+my %bal_skip_R = ( 'context' => 1, 'i915' => 1 );
+my %bal_skip_G = ( 'i915' => 1 );
 
 my @workloads = (
        'media_load_balance_17i7.wsim',
@@ -498,6 +499,8 @@ foreach my $wrk (@saturation_workloads) {
                                my $bid;
 
                                if ($bal ne '') {
+                                       next GBAL if $G =~ '-G' and exists 
$bal_skip_G{$bal};
+
                                        push @xargs, "-b $bal";
                                        push @xargs, '-R' unless exists 
$bal_skip_R{$bal};
                                        push @xargs, $G if $G ne '';
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to