On Thu, Oct 14, 2021 at 11:42:41AM -0700, John Harrison wrote:
> On 10/14/2021 10:20, Matthew Brost wrote:
> > A weak implementation of parallel submission (multi-bb execbuf IOCTL) for
> > execlists. Doing as little as possible to support this interface for
> > execlists - basically just passing submit fences between each request
> > generated and virtual engines are not allowed. This is on par with what
> > is there for the existing (hopefully soon deprecated) bonding interface.
> > 
> > We perma-pin these execlists contexts to align with GuC implementation.
> > 
> > Signed-off-by: Matthew Brost <matthew.br...@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_context.c   | 10 ++--
> >   drivers/gpu/drm/i915/gt/intel_context.c       |  4 +-
> >   .../drm/i915/gt/intel_execlists_submission.c  | 56 ++++++++++++++++++-
> >   drivers/gpu/drm/i915/gt/intel_lrc.c           |  2 +
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  2 -
> >   5 files changed, 64 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index fb33d0322960..35e87a7d0ea9 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > @@ -570,10 +570,6 @@ set_proto_ctx_engines_parallel_submit(struct 
> > i915_user_extension __user *base,
> >     struct intel_engine_cs **siblings = NULL;
> >     intel_engine_mask_t prev_mask;
> > -   /* FIXME: This is NIY for execlists */
> > -   if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
> > -           return -ENODEV;
> > -
> >     if (get_user(slot, &ext->engine_index))
> >             return -EFAULT;
> > @@ -583,6 +579,12 @@ set_proto_ctx_engines_parallel_submit(struct 
> > i915_user_extension __user *base,
> >     if (get_user(num_siblings, &ext->num_siblings))
> >             return -EFAULT;
> > +   if (!intel_uc_uses_guc_submission(&i915->gt.uc) && num_siblings != 1) {
> > +           drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC 
> > mode\n",
> > +                   num_siblings);
> > +           return -EINVAL;
> > +   }
> > +
> >     if (slot >= set->num_engines) {
> >             drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
> >                     slot, set->num_engines);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
> > b/drivers/gpu/drm/i915/gt/intel_context.c
> > index 5634d14052bc..1bec92e1d8e6 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> > @@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct 
> > intel_context *ce)
> >     __i915_active_acquire(&ce->active);
> > -   if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine))
> > +   if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) ||
> > +       intel_context_is_parallel(ce))
> >             return 0;
> >     /* Preallocate tracking nodes */
> > @@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct 
> > intel_context *parent,
> >      * Callers responsibility to validate that this function is used
> >      * correctly but we use GEM_BUG_ON here ensure that they do.
> >      */
> > -   GEM_BUG_ON(!intel_engine_uses_guc(parent->engine));
> >     GEM_BUG_ON(intel_context_is_pinned(parent));
> >     GEM_BUG_ON(intel_context_is_child(parent));
> >     GEM_BUG_ON(intel_context_is_pinned(child));
> > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
> > b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > index bedb80057046..8cd986bdf26c 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> > @@ -927,8 +927,7 @@ static void execlists_submit_ports(struct 
> > intel_engine_cs *engine)
> >   static bool ctx_single_port_submission(const struct intel_context *ce)
> >   {
> > -   return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
> > -           intel_context_force_single_submission(ce));
> > +   return intel_context_force_single_submission(ce);
> Does this change not affect all execlist operation rather than just parallel
> submission?
> 

I don't think so. The only place that sets single submission was in the
GVT code. I think was an optimization so this would just compile out if
GVT wasn't built.

> >   }
> >   static bool can_merge_ctx(const struct intel_context *prev,
> > @@ -2598,6 +2597,58 @@ static void execlists_context_cancel_request(struct 
> > intel_context *ce,
> >                                   current->comm);
> >   }
> > +static struct intel_context *
> > +execlists_create_parallel(struct intel_engine_cs **engines,
> > +                     unsigned int num_siblings,
> > +                     unsigned int width)
> > +{
> > +   struct intel_engine_cs **siblings = NULL;
> > +   struct intel_context *parent = NULL, *ce, *err;
> > +   int i, j;
> > +
> > +   GEM_BUG_ON(num_siblings != 1);
> > +
> > +   siblings = kmalloc_array(num_siblings,
> > +                            sizeof(*siblings),
> > +                            GFP_KERNEL);
> > +   if (!siblings)
> > +           return ERR_PTR(-ENOMEM);
> > +
> > +   for (i = 0; i < width; ++i) {
> > +           for (j = 0; j < num_siblings; ++j)
> > +                   siblings[j] = engines[i * num_siblings + j];
> What is the purpose of this array? The only usage that I can see is
> siblings[0] on the line below. The rest of the entries never seem to be
> used. So you could just replace that with 'engines[i * num_siblings]' and
> drop the siblings array itself completely?
> 

Yes, this can be dropped as num_siblings must be 1. I think this was
copied and pasted from the GuC code where num_siblings could be more
than 1. Will fixup in a standalone patch after we merge the GuC parallel
submission.

Matt

> John.
> 
> 
> > +
> > +           ce = intel_context_create(siblings[0]);
> > +           if (!ce) {
> > +                   err = ERR_PTR(-ENOMEM);
> > +                   goto unwind;
> > +           }
> > +
> > +           if (i == 0)
> > +                   parent = ce;
> > +           else
> > +                   intel_context_bind_parent_child(parent, ce);
> > +   }
> > +
> > +   parent->parallel.fence_context = dma_fence_context_alloc(1);
> > +
> > +   intel_context_set_nopreempt(parent);
> > +   intel_context_set_single_submission(parent);
> > +   for_each_child(parent, ce) {
> > +           intel_context_set_nopreempt(ce);
> > +           intel_context_set_single_submission(ce);
> > +   }
> > +
> > +   kfree(siblings);
> > +   return parent;
> > +
> > +unwind:
> > +   if (parent)
> > +           intel_context_put(parent);
> > +   kfree(siblings);
> > +   return err;
> > +}
> > +
> >   static const struct intel_context_ops execlists_context_ops = {
> >     .flags = COPS_HAS_INFLIGHT,
> > @@ -2616,6 +2667,7 @@ static const struct intel_context_ops 
> > execlists_context_ops = {
> >     .reset = lrc_reset,
> >     .destroy = lrc_destroy,
> > +   .create_parallel = execlists_create_parallel,
> >     .create_virtual = execlists_create_virtual,
> >   };
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> > b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 56156cf18c41..70f4b309522d 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -1065,6 +1065,8 @@ lrc_pin(struct intel_context *ce,
> >   void lrc_unpin(struct intel_context *ce)
> >   {
> > +   if (unlikely(ce->parallel.last_rq))
> > +           i915_request_put(ce->parallel.last_rq);
> >     check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
> >                   ce->engine);
> >   }
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index d7710debcd47..145ffe265305 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -2960,8 +2960,6 @@ static void guc_parent_context_unpin(struct 
> > intel_context *ce)
> >     GEM_BUG_ON(!intel_context_is_parent(ce));
> >     GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
> > -   if (ce->parallel.last_rq)
> > -           i915_request_put(ce->parallel.last_rq);
> >     unpin_guc_id(guc, ce);
> >     lrc_unpin(ce);
> >   }
> 

Reply via email to