Mika Kuoppala <mika.kuopp...@linux.intel.com> writes:

> To further enchance port processing, keep track of
> reserved ports. This way we can iterate only the used subset
> of port space. Note that we lift the responsibility of
> execlists_submit_request() to inspect hw availability and

s/execlist_submit_request/insert_request.

-Mika

> always do dequeuing. This is to ensure that only the irq
> handler will be responsible for keeping track of available ports.
>
> v2: rebase, comment fix, READ_ONCE only outside of irq handler (Chris)
>
> Cc: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiar...@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuopp...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_guc_submission.c | 51 +++++++++--------
>  drivers/gpu/drm/i915/i915_irq.c            |  2 +-
>  drivers/gpu/drm/i915/intel_engine_cs.c     |  7 ++-
>  drivers/gpu/drm/i915/intel_lrc.c           | 90 
> ++++++++++++++++++------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h    | 55 +++++++++++++-----
>  5 files changed, 129 insertions(+), 76 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
> b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 25c9bac94c39..359f57a59cba 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -487,7 +487,7 @@ static void guc_ring_doorbell(struct i915_guc_client 
> *client)
>   * @engine: engine associated with the commands
>   *
>   * The only error here arises if the doorbell hardware isn't functioning
> - * as expected, which really shouln't happen.
> + * as expected, which really shouldn't happen.
>   */
>  static void i915_guc_submit(struct intel_engine_cs *engine)
>  {
> @@ -495,17 +495,19 @@ static void i915_guc_submit(struct intel_engine_cs 
> *engine)
>       struct intel_guc *guc = &dev_priv->guc;
>       struct i915_guc_client *client = guc->execbuf_client;
>       struct intel_engine_execlist * const el = &engine->execlist;
> -     struct execlist_port *port = el->port;
>       const unsigned int engine_id = engine->id;
>       unsigned int n;
>  
> -     for (n = 0; n < ARRAY_SIZE(el->port); n++) {
> +     for (n = 0; n < execlist_active_ports(el); n++) {
> +             struct execlist_port *port;
>               struct drm_i915_gem_request *rq;
>               unsigned int count;
>  
> -             rq = port_unpack(&port[n], &count);
> +             port = execlist_port_index(el, n);
> +
> +             rq = port_unpack(port, &count);
>               if (rq && count == 0) {
> -                     port_set(&port[n], port_pack(rq, ++count));
> +                     port_set(port, port_pack(rq, ++count));
>  
>                       if (i915_vma_is_map_and_fenceable(rq->ring->vma))
>                               POSTING_READ_FW(GUC_STATUS);
> @@ -560,25 +562,27 @@ static void port_assign(struct execlist_port *port,
>  static void i915_guc_dequeue(struct intel_engine_cs *engine)
>  {
>       struct intel_engine_execlist * const el = &engine->execlist;
> -     struct execlist_port *port = el->port;
> +     struct execlist_port *port;
>       struct drm_i915_gem_request *last = NULL;
> -     const struct execlist_port * const last_port = execlist_port_tail(el);
>       bool submit = false;
>       struct rb_node *rb;
>  
> -     if (port_isset(port))
> -             port++;
> -
>       spin_lock_irq(&engine->timeline->lock);
>       rb = el->first;
>       GEM_BUG_ON(rb_first(&el->queue) != rb);
> -     while (rb) {
> +
> +     if (unlikely(!rb))
> +             goto done;
> +
> +     port = execlist_request_port(el);
> +
> +     do {
>               struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
>               struct drm_i915_gem_request *rq, *rn;
>  
>               list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
>                       if (last && rq->ctx != last->ctx) {
> -                             if (port == last_port) {
> +                             if (!execlist_inactive_ports(el)) {
>                                       __list_del_many(&p->requests,
>                                                       &rq->priotree.link);
>                                       goto done;
> @@ -587,7 +591,8 @@ static void i915_guc_dequeue(struct intel_engine_cs 
> *engine)
>                               if (submit)
>                                       port_assign(port, last);
>  
> -                             port = execlist_port_next(el, port);
> +                             port = execlist_request_port(el);
> +                             GEM_BUG_ON(port_isset(port));
>                       }
>  
>                       INIT_LIST_HEAD(&rq->priotree.link);
> @@ -604,7 +609,7 @@ static void i915_guc_dequeue(struct intel_engine_cs 
> *engine)
>               INIT_LIST_HEAD(&p->requests);
>               if (p->priority != I915_PRIORITY_NORMAL)
>                       kmem_cache_free(engine->i915->priorities, p);
> -     }
> +     } while (rb);
>  done:
>       el->first = rb;
>       if (submit) {
> @@ -618,21 +623,21 @@ static void i915_guc_irq_handler(unsigned long data)
>  {
>       struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>       struct intel_engine_execlist * const el = &engine->execlist;
> -     struct execlist_port *port = execlist_port_head(el);
> -     const struct execlist_port * const last_port = execlist_port_tail(el);
> -     struct drm_i915_gem_request *rq;
>  
> -     rq = port_request(port);
> -     while (rq && i915_gem_request_completed(rq)) {
> +     while (execlist_active_ports(el)) {
> +             struct execlist_port *port = execlist_port_head(el);
> +             struct drm_i915_gem_request *rq = port_request(port);
> +
> +             if (!i915_gem_request_completed(rq))
> +                     break;
> +
>               trace_i915_gem_request_out(rq);
>               i915_gem_request_put(rq);
>  
> -             port = execlist_port_complete(el, port);
> -
> -             rq = port_request(port);
> +             execlist_release_port(el, port);
>       }
>  
> -     if (!port_isset(last_port))
> +     if (execlist_inactive_ports(el))
>               i915_guc_dequeue(engine);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index ac5a95439393..a9d888b726c4 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1342,7 +1342,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 
> iir, int test_shift)
>       bool tasklet = false;
>  
>       if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
> -             if (port_count(execlist_port_head(el))) {
> +             if (READ_ONCE(el->port_count)) {
>                       __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>                       tasklet = true;
>               }
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/intel_engine_cs.c
> index b0d702063a50..29b170fdd6ef 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -407,6 +407,9 @@ static void intel_engine_init_execlist(struct 
> intel_engine_cs *engine)
>       BUILD_BUG_ON_NOT_POWER_OF_2(execlist_num_ports(&engine->execlist));
>       GEM_BUG_ON(execlist_num_ports(&engine->execlist) > EXECLIST_MAX_PORTS);
>  
> +     el->port_head = 0;
> +     el->port_count = 0;
> +
>       el->queue = RB_ROOT;
>       el->first = NULL;
>  }
> @@ -1501,8 +1504,8 @@ bool intel_engine_is_idle(struct intel_engine_cs 
> *engine)
>       if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
>               return false;
>  
> -     /* Both ports drained, no more ELSP submission? */
> -     if (port_request(execlist_port_head(&engine->execlist)))
> +     /* All ports drained, no more ELSP submission? */
> +     if (execlist_active_ports(&engine->execlist))
>               return false;
>  
>       /* ELSP is empty, but there are ready requests? */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 8550cd6635c9..bea10620bed2 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -399,26 +399,29 @@ static void execlists_submit_ports(struct 
> intel_engine_cs *engine)
>               engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
>       unsigned int n;
>  
> -     for (n = execlist_num_ports(el); n--; ) {
> -             struct execlist_port *port;
> +     for (n = 0; n < execlist_inactive_ports(el); n++) {
> +             writel(0, elsp);
> +             writel(0, elsp);
> +     }
> +
> +     for (n = execlist_active_ports(el); n--; ) {
>               struct drm_i915_gem_request *rq;
> +             struct execlist_port *port;
>               unsigned int count;
>               u64 desc;
>  
>               port = execlist_port_index(el, n);
> -
>               rq = port_unpack(port, &count);
> -             if (rq) {
> -                     GEM_BUG_ON(count > !n);
> -                     if (!count++)
> -                             execlists_context_status_change(rq, 
> INTEL_CONTEXT_SCHEDULE_IN);
> -                     port_set(port, port_pack(rq, count));
> -                     desc = execlists_update_context(rq);
> -                     GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
> -             } else {
> -                     GEM_BUG_ON(!n);
> -                     desc = 0;
> -             }
> +
> +             GEM_BUG_ON(!rq);
> +             GEM_BUG_ON(count > !n);
> +
> +             if (!count++)
> +                     execlists_context_status_change(rq, 
> INTEL_CONTEXT_SCHEDULE_IN);
> +
> +             port_set(port, port_pack(rq, count));
> +             desc = execlists_update_context(rq);
> +             GEM_DEBUG_EXEC(port->context_id = upper_32_bits(desc));
>  
>               writel(upper_32_bits(desc), elsp);
>               writel(lower_32_bits(desc), elsp);
> @@ -456,15 +459,23 @@ static void port_assign(struct execlist_port *port,
>  
>  static void execlists_dequeue(struct intel_engine_cs *engine)
>  {
> -     struct drm_i915_gem_request *last;
>       struct intel_engine_execlist * const el = &engine->execlist;
> -     struct execlist_port *port = execlist_port_head(el);
> -     const struct execlist_port * const last_port = execlist_port_tail(el);
> +     struct execlist_port *port;
> +     struct drm_i915_gem_request *last;
>       struct rb_node *rb;
>       bool submit = false;
>  
> -     last = port_request(port);
> -     if (last)
> +     spin_lock_irq(&engine->timeline->lock);
> +     rb = el->first;
> +     GEM_BUG_ON(rb_first(&el->queue) != rb);
> +
> +     if (unlikely(!rb))
> +             goto done;
> +
> +     if (execlist_active_ports(el)) {
> +             port = execlist_port_tail(el);
> +             last = port_request(port);
> +
>               /* WaIdleLiteRestore:bdw,skl
>                * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
>                * as we resubmit the request. See gen8_emit_breadcrumb()
> @@ -472,6 +483,11 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>                * request.
>                */
>               last->tail = last->wa_tail;
> +     } else {
> +             /* Allocate first port to coalesce into */
> +             port = execlist_request_port(el);
> +             last = NULL;
> +     }
>  
>       /* Hardware submission is through 2 ports. Conceptually each port
>        * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
> @@ -493,11 +509,7 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>        * sequence of requests as being the most optimal (fewest wake ups
>        * and context switches) submission.
>        */
> -
> -     spin_lock_irq(&engine->timeline->lock);
> -     rb = el->first;
> -     GEM_BUG_ON(rb_first(&el->queue) != rb);
> -     while (rb) {
> +     do {
>               struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
>               struct drm_i915_gem_request *rq, *rn;
>  
> @@ -515,11 +527,11 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>                        */
>                       if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
>                               /*
> -                              * If we are on the second port and cannot
> +                              * If we are on the last port and cannot
>                                * combine this request with the last, then we
>                                * are done.
>                                */
> -                             if (port == last_port) {
> +                             if (!execlist_inactive_ports(el)) {
>                                       __list_del_many(&p->requests,
>                                                       &rq->priotree.link);
>                                       goto done;
> @@ -544,8 +556,7 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>                               if (submit)
>                                       port_assign(port, last);
>  
> -                             port = execlist_port_next(el, port);
> -
> +                             port = execlist_request_port(el);
>                               GEM_BUG_ON(port_isset(port));
>                       }
>  
> @@ -563,7 +574,8 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>               INIT_LIST_HEAD(&p->requests);
>               if (p->priority != I915_PRIORITY_NORMAL)
>                       kmem_cache_free(engine->i915->priorities, p);
> -     }
> +     } while (rb);
> +
>  done:
>       el->first = rb;
>       if (submit)
> @@ -582,6 +594,9 @@ static void execlist_cancel_port_requests(struct 
> intel_engine_execlist *el)
>               i915_gem_request_put(port_request(&el->port[i]));
>  
>       memset(el->port, 0, sizeof(el->port));
> +
> +     el->port_count = 0;
> +     el->port_head = 0;
>  }
>  
>  static void execlists_cancel_requests(struct intel_engine_cs *engine)
> @@ -643,10 +658,12 @@ static void execlists_cancel_requests(struct 
> intel_engine_cs *engine)
>  
>  static bool execlists_elsp_ready(struct intel_engine_execlist * const el)
>  {
> -     struct execlist_port * const port0 = execlist_port_head(el);
> -     struct execlist_port * const port1 = execlist_port_next(el, port0);
> +     const unsigned int active = execlist_active_ports(el);
> +
> +     if (!active)
> +             return true;
>  
> -     return port_count(port0) + port_count(port1) < 2;
> +     return port_count(execlist_port_tail(el)) + active < 2;
>  }
>  
>  /*
> @@ -657,7 +674,6 @@ static void intel_lrc_irq_handler(unsigned long data)
>  {
>       struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>       struct intel_engine_execlist * const el = &engine->execlist;
> -     struct execlist_port *port = execlist_port_head(el);
>       struct drm_i915_private *dev_priv = engine->i915;
>  
>       /* We can skip acquiring intel_runtime_pm_get() here as it was taken
> @@ -714,6 +730,7 @@ static void intel_lrc_irq_handler(unsigned long data)
>               }
>  
>               while (head != tail) {
> +                     struct execlist_port *port;
>                       struct drm_i915_gem_request *rq;
>                       unsigned int status;
>                       unsigned int count;
> @@ -742,6 +759,7 @@ static void intel_lrc_irq_handler(unsigned long data)
>                       if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>                               continue;
>  
> +                     port = execlist_port_head(el);
>                       /* Check the context/desc id for this event matches */
>                       GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
>  
> @@ -755,13 +773,13 @@ static void intel_lrc_irq_handler(unsigned long data)
>                               trace_i915_gem_request_out(rq);
>                               i915_gem_request_put(rq);
>  
> -                             port = execlist_port_complete(el, port);
> +                             execlist_release_port(el, port);
>                       } else {
>                               port_set(port, port_pack(rq, count));
>                       }
>  
>                       /* After the final element, the hw should be idle */
> -                     GEM_BUG_ON(port_count(port) == 0 &&
> +                     GEM_BUG_ON(execlist_active_ports(el) == 0 &&
>                                  !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
>               }
>  
> @@ -786,7 +804,7 @@ static void insert_request(struct intel_engine_cs *engine,
>       struct i915_priolist *p = lookup_priolist(engine, pt, prio);
>  
>       list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
> -     if (ptr_unmask_bits(p, 1) && execlists_elsp_ready(el))
> +     if (ptr_unmask_bits(p, 1))
>               tasklet_hi_schedule(&el->irq_tasklet);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
> b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 991f6c0bd6c2..efa5a8ea1ecb 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -249,6 +249,11 @@ struct intel_engine_execlist {
>       unsigned int port_head;
>  
>       /**
> +      * @port_count: reserved ports
> +      */
> +     unsigned int port_count;
> +
> +     /**
>        * @queue: queue of requests, in priority lists
>        */
>       struct rb_root queue;
> @@ -529,14 +534,20 @@ execlist_num_ports(const struct intel_engine_execlist * 
> const el)
>       return el->port_mask + 1;
>  }
>  
> -#define __port_idx(start, index, mask) (((start) + (index)) & (mask))
> +static inline unsigned int
> +execlist_active_ports(const struct intel_engine_execlist * const el)
> +{
> +     return el->port_count;
> +}
>  
> -static inline struct execlist_port *
> -execlist_port_head(struct intel_engine_execlist * const el)
> +static inline unsigned int
> +execlist_inactive_ports(const struct intel_engine_execlist * const el)
>  {
> -     return &el->port[el->port_head];
> +     return execlist_num_ports(el) - execlist_active_ports(el);
>  }
>  
> +#define __port_idx(start, index, mask) (((start) + (index)) & (mask))
> +
>  /* Index starting from port_head */
>  static inline struct execlist_port *
>  execlist_port_index(struct intel_engine_execlist * const el,
> @@ -546,30 +557,46 @@ execlist_port_index(struct intel_engine_execlist * 
> const el,
>  }
>  
>  static inline struct execlist_port *
> -execlist_port_tail(struct intel_engine_execlist * const el)
> +execlist_port_head(struct intel_engine_execlist * const el)
>  {
> -     return &el->port[__port_idx(el->port_head, -1, el->port_mask)];
> +     GEM_BUG_ON(!el->port_count);
> +
> +     return execlist_port_index(el, 0);
>  }
>  
>  static inline struct execlist_port *
> -execlist_port_next(struct intel_engine_execlist * const el,
> -                const struct execlist_port * const port)
> +execlist_port_tail(struct intel_engine_execlist * const el)
>  {
> -     const unsigned int i = port_index(port, el);
> +     GEM_BUG_ON(!el->port_count);
>  
> -     return &el->port[__port_idx(i, 1, el->port_mask)];
> +     return execlist_port_index(el, el->port_count - 1);
>  }
>  
>  static inline struct execlist_port *
> -execlist_port_complete(struct intel_engine_execlist * const el,
> -                    struct execlist_port * const port)
> +execlist_request_port(struct intel_engine_execlist * const el)
>  {
> +     GEM_BUG_ON(el->port_count == el->port_mask + 1);
> +
> +     el->port_count++;
> +
> +     GEM_BUG_ON(port_isset(execlist_port_tail(el)));
> +
> +     return execlist_port_tail(el);
> +}
> +
> +static inline void
> +execlist_release_port(struct intel_engine_execlist * const el,
> +                   struct execlist_port * const port)
> +{
> +
>       GEM_BUG_ON(port_index(port, el) != el->port_head);
> +     GEM_BUG_ON(!port_isset(port));
> +     GEM_BUG_ON(!el->port_count);
>  
>       memset(port, 0, sizeof(struct execlist_port));
> -     el->port_head = __port_idx(el->port_head, 1, el->port_mask);
>  
> -     return execlist_port_head(el);
> +     el->port_head = __port_idx(el->port_head, 1, el->port_mask);
> +     el->port_count--;
>  }
>  
>  static inline unsigned int
> -- 
> 2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to