[Intel-gfx] [PATCH 7/8] drm/i915/gt: Decouple inflight virtual engines

2020-05-18 Thread Chris Wilson
Once a virtual engine has been bound to a sibling, it will remain bound
until we finally schedule out the last active request. We can not rebind
the context to a new sibling while it is inflight as the context save
will conflict, hence we wait. As we cannot then use any other sibliing
while the context is inflight, only kick the bound sibling while it
inflight and upon scheduling out the kick the rest (so that we can swap
engines on timeslicing if the previously bound engine becomes
oversubscribed).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 30 +
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index fcb500f08eac..0710ddbcf85f 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1398,9 +1398,8 @@ execlists_schedule_in(struct i915_request *rq, int idx)
 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 {
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
-   struct i915_request *next = READ_ONCE(ve->request);
 
-   if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
+   if (READ_ONCE(ve->request))
tasklet_hi_schedule(&ve->base.execlists.tasklet);
 }
 
@@ -1821,18 +1820,14 @@ first_virtual_engine(struct intel_engine_cs *engine)
rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
struct i915_request *rq = READ_ONCE(ve->request);
 
-   if (!rq) { /* lazily cleanup after another engine handled rq */
+   /* lazily cleanup after another engine handled rq */
+   if (!rq || !virtual_matches(ve, rq, engine)) {
rb_erase_cached(rb, &el->virtual);
RB_CLEAR_NODE(rb);
rb = rb_first_cached(&el->virtual);
continue;
}
 
-   if (!virtual_matches(ve, rq, engine)) {
-   rb = rb_next(rb);
-   continue;
-   }
-
return ve;
}
 
@@ -5478,7 +5473,6 @@ static void virtual_submission_tasklet(unsigned long data)
if (unlikely(!mask))
return;
 
-   local_irq_disable();
for (n = 0; n < ve->num_siblings; n++) {
struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
struct ve_node * const node = &ve->nodes[sibling->id];
@@ -5488,20 +5482,19 @@ static void virtual_submission_tasklet(unsigned long 
data)
if (!READ_ONCE(ve->request))
break; /* already handled by a sibling's tasklet */
 
+   spin_lock_irq(&sibling->active.lock);
+
if (unlikely(!(mask & sibling->mask))) {
if (!RB_EMPTY_NODE(&node->rb)) {
-   spin_lock(&sibling->active.lock);
rb_erase_cached(&node->rb,
&sibling->execlists.virtual);
RB_CLEAR_NODE(&node->rb);
-   spin_unlock(&sibling->active.lock);
}
-   continue;
-   }
 
-   spin_lock(&sibling->active.lock);
+   goto unlock_engine;
+   }
 
-   if (!RB_EMPTY_NODE(&node->rb)) {
+   if (unlikely(!RB_EMPTY_NODE(&node->rb))) {
/*
 * Cheat and avoid rebalancing the tree if we can
 * reuse this node in situ.
@@ -5543,9 +5536,12 @@ static void virtual_submission_tasklet(unsigned long 
data)
sibling->execlists.queue_priority_hint))
tasklet_hi_schedule(&sibling->execlists.tasklet);
 
-   spin_unlock(&sibling->active.lock);
+unlock_engine:
+   spin_unlock_irq(&sibling->active.lock);
+
+   if (intel_context_inflight(&ve->context))
+   break;
}
-   local_irq_enable();
 }
 
 static void virtual_submit_request(struct i915_request *rq)
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 7/8] drm/i915/gt: Decouple inflight virtual engines

2020-05-18 Thread Chris Wilson
Once a virtual engine has been bound to a sibling, it will remain bound
until we finally schedule out the last active request. We can not rebind
the context to a new sibling while it is inflight as the context save
will conflict, hence we wait. As we cannot then use any other sibliing
while the context is inflight, only kick the bound sibling while it
inflight and upon scheduling out the kick the rest (so that we can swap
engines on timeslicing if the previously bound engine becomes
oversubscribed).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 30 +
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7a5ac3375225..fe8f3518d6b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1398,9 +1398,8 @@ execlists_schedule_in(struct i915_request *rq, int idx)
 static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
 {
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
-   struct i915_request *next = READ_ONCE(ve->request);
 
-   if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
+   if (READ_ONCE(ve->request))
tasklet_hi_schedule(&ve->base.execlists.tasklet);
 }
 
@@ -1821,18 +1820,14 @@ first_virtual_engine(struct intel_engine_cs *engine)
rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
struct i915_request *rq = READ_ONCE(ve->request);
 
-   if (!rq) { /* lazily cleanup after another engine handled rq */
+   /* lazily cleanup after another engine handled rq */
+   if (!rq || !virtual_matches(ve, rq, engine)) {
rb_erase_cached(rb, &el->virtual);
RB_CLEAR_NODE(rb);
rb = rb_first_cached(&el->virtual);
continue;
}
 
-   if (!virtual_matches(ve, rq, engine)) {
-   rb = rb_next(rb);
-   continue;
-   }
-
return ve;
}
 
@@ -5478,7 +5473,6 @@ static void virtual_submission_tasklet(unsigned long data)
if (unlikely(!mask))
return;
 
-   local_irq_disable();
for (n = 0; n < ve->num_siblings; n++) {
struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
struct ve_node * const node = &ve->nodes[sibling->id];
@@ -5488,20 +5482,19 @@ static void virtual_submission_tasklet(unsigned long 
data)
if (!READ_ONCE(ve->request))
break; /* already handled by a sibling's tasklet */
 
+   spin_lock_irq(&sibling->active.lock);
+
if (unlikely(!(mask & sibling->mask))) {
if (!RB_EMPTY_NODE(&node->rb)) {
-   spin_lock(&sibling->active.lock);
rb_erase_cached(&node->rb,
&sibling->execlists.virtual);
RB_CLEAR_NODE(&node->rb);
-   spin_unlock(&sibling->active.lock);
}
-   continue;
-   }
 
-   spin_lock(&sibling->active.lock);
+   goto unlock_engine;
+   }
 
-   if (!RB_EMPTY_NODE(&node->rb)) {
+   if (unlikely(!RB_EMPTY_NODE(&node->rb))) {
/*
 * Cheat and avoid rebalancing the tree if we can
 * reuse this node in situ.
@@ -5541,9 +5534,12 @@ static void virtual_submission_tasklet(unsigned long 
data)
if (first && prio >= sibling->execlists.queue_priority_hint)
tasklet_hi_schedule(&sibling->execlists.tasklet);
 
-   spin_unlock(&sibling->active.lock);
+unlock_engine:
+   spin_unlock_irq(&sibling->active.lock);
+
+   if (intel_context_inflight(&ve->context))
+   break;
}
-   local_irq_enable();
 }
 
 static void virtual_submit_request(struct i915_request *rq)
-- 
2.20.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 7/8] drm/i915/gt: Decouple inflight virtual engines

2020-05-18 Thread Tvrtko Ursulin



On 18/05/2020 09:14, Chris Wilson wrote:

Once a virtual engine has been bound to a sibling, it will remain bound
until we finally schedule out the last active request. We can not rebind
the context to a new sibling while it is inflight as the context save
will conflict, hence we wait. As we cannot then use any other sibliing
while the context is inflight, only kick the bound sibling while it
inflight and upon scheduling out the kick the rest (so that we can swap
engines on timeslicing if the previously bound engine becomes
oversubscribed).

Signed-off-by: Chris Wilson 
---
  drivers/gpu/drm/i915/gt/intel_lrc.c | 30 +
  1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7a5ac3375225..fe8f3518d6b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1398,9 +1398,8 @@ execlists_schedule_in(struct i915_request *rq, int idx)
  static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
  {
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
-   struct i915_request *next = READ_ONCE(ve->request);
  
-	if (next == rq || (next && next->execution_mask & ~rq->execution_mask))

+   if (READ_ONCE(ve->request))
tasklet_hi_schedule(&ve->base.execlists.tasklet);
  }
  
@@ -1821,18 +1820,14 @@ first_virtual_engine(struct intel_engine_cs *engine)

rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
struct i915_request *rq = READ_ONCE(ve->request);
  
-		if (!rq) { /* lazily cleanup after another engine handled rq */

+   /* lazily cleanup after another engine handled rq */
+   if (!rq || !virtual_matches(ve, rq, engine)) {
rb_erase_cached(rb, &el->virtual);
RB_CLEAR_NODE(rb);
rb = rb_first_cached(&el->virtual);
continue;
}
  
-		if (!virtual_matches(ve, rq, engine)) {

-   rb = rb_next(rb);
-   continue;
-   }
-
return ve;
}
  
@@ -5478,7 +5473,6 @@ static void virtual_submission_tasklet(unsigned long data)

if (unlikely(!mask))
return;
  
-	local_irq_disable();

for (n = 0; n < ve->num_siblings; n++) {
struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
struct ve_node * const node = &ve->nodes[sibling->id];
@@ -5488,20 +5482,19 @@ static void virtual_submission_tasklet(unsigned long 
data)
if (!READ_ONCE(ve->request))
break; /* already handled by a sibling's tasklet */
  
+		spin_lock_irq(&sibling->active.lock);

+
if (unlikely(!(mask & sibling->mask))) {
if (!RB_EMPTY_NODE(&node->rb)) {
-   spin_lock(&sibling->active.lock);
rb_erase_cached(&node->rb,
&sibling->execlists.virtual);
RB_CLEAR_NODE(&node->rb);
-   spin_unlock(&sibling->active.lock);
}
-   continue;
-   }
  
-		spin_lock(&sibling->active.lock);

+   goto unlock_engine;
+   }
  
-		if (!RB_EMPTY_NODE(&node->rb)) {

+   if (unlikely(!RB_EMPTY_NODE(&node->rb))) {
/*
 * Cheat and avoid rebalancing the tree if we can
 * reuse this node in situ.
@@ -5541,9 +5534,12 @@ static void virtual_submission_tasklet(unsigned long 
data)
if (first && prio >= sibling->execlists.queue_priority_hint)
tasklet_hi_schedule(&sibling->execlists.tasklet);
  
-		spin_unlock(&sibling->active.lock);

+unlock_engine:
+   spin_unlock_irq(&sibling->active.lock);
+
+   if (intel_context_inflight(&ve->context))
+   break;


So virtual request may not be added to all siblings any longer. Will it 
still be able to schedule it on any if time slicing kicks in under these 
conditions?


This is equivalent to the hunk in first_virtual_engine which also 
removes it from all other siblings.


I guess it's inline with what the commit messages says - that new 
sibling will be picked upon time slicing. I just don't quite see the 
path which would do it. Only path which shuffles the siblings array 
around is in dequeue, and dequeue on other that the engine which first 
picked it will not happen any more. I must be missing something..


Regards,

Tvrtko


}
-   local_irq_enable();
  }
  
  static void virtual_submit_request(struct i915_request *rq)



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
ht

Re: [Intel-gfx] [PATCH 7/8] drm/i915/gt: Decouple inflight virtual engines

2020-05-18 Thread Chris Wilson
Quoting Tvrtko Ursulin (2020-05-18 13:53:29)
> 
> On 18/05/2020 09:14, Chris Wilson wrote:
> > Once a virtual engine has been bound to a sibling, it will remain bound
> > until we finally schedule out the last active request. We can not rebind
> > the context to a new sibling while it is inflight as the context save
> > will conflict, hence we wait. As we cannot then use any other sibliing
> > while the context is inflight, only kick the bound sibling while it
> > inflight and upon scheduling out the kick the rest (so that we can swap
> > engines on timeslicing if the previously bound engine becomes
> > oversubscribed).
> > 
> > Signed-off-by: Chris Wilson 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_lrc.c | 30 +
> >   1 file changed, 13 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> > b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 7a5ac3375225..fe8f3518d6b8 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -1398,9 +1398,8 @@ execlists_schedule_in(struct i915_request *rq, int 
> > idx)
> >   static void kick_siblings(struct i915_request *rq, struct intel_context 
> > *ce)
> >   {
> >   struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> > - struct i915_request *next = READ_ONCE(ve->request);
> >   
> > - if (next == rq || (next && next->execution_mask & 
> > ~rq->execution_mask))
> > + if (READ_ONCE(ve->request))
> >   tasklet_hi_schedule(&ve->base.execlists.tasklet);
> >   }
> >   
> > @@ -1821,18 +1820,14 @@ first_virtual_engine(struct intel_engine_cs *engine)
> >   rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> >   struct i915_request *rq = READ_ONCE(ve->request);
> >   
> > - if (!rq) { /* lazily cleanup after another engine handled rq 
> > */
> > + /* lazily cleanup after another engine handled rq */
> > + if (!rq || !virtual_matches(ve, rq, engine)) {
> >   rb_erase_cached(rb, &el->virtual);
> >   RB_CLEAR_NODE(rb);
> >   rb = rb_first_cached(&el->virtual);
> >   continue;
> >   }
> >   
> > - if (!virtual_matches(ve, rq, engine)) {
> > - rb = rb_next(rb);
> > - continue;
> > - }
> > -
> >   return ve;
> >   }
> >   
> > @@ -5478,7 +5473,6 @@ static void virtual_submission_tasklet(unsigned long 
> > data)
> >   if (unlikely(!mask))
> >   return;
> >   
> > - local_irq_disable();
> >   for (n = 0; n < ve->num_siblings; n++) {
> >   struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
> >   struct ve_node * const node = &ve->nodes[sibling->id];
> > @@ -5488,20 +5482,19 @@ static void virtual_submission_tasklet(unsigned 
> > long data)
> >   if (!READ_ONCE(ve->request))
> >   break; /* already handled by a sibling's tasklet */
> >   
> > + spin_lock_irq(&sibling->active.lock);
> > +
> >   if (unlikely(!(mask & sibling->mask))) {
> >   if (!RB_EMPTY_NODE(&node->rb)) {
> > - spin_lock(&sibling->active.lock);
> >   rb_erase_cached(&node->rb,
> >   &sibling->execlists.virtual);
> >   RB_CLEAR_NODE(&node->rb);
> > - spin_unlock(&sibling->active.lock);
> >   }
> > - continue;
> > - }
> >   
> > - spin_lock(&sibling->active.lock);
> > + goto unlock_engine;
> > + }
> >   
> > - if (!RB_EMPTY_NODE(&node->rb)) {
> > + if (unlikely(!RB_EMPTY_NODE(&node->rb))) {
> >   /*
> >* Cheat and avoid rebalancing the tree if we can
> >* reuse this node in situ.
> > @@ -5541,9 +5534,12 @@ static void virtual_submission_tasklet(unsigned long 
> > data)
> >   if (first && prio >= sibling->execlists.queue_priority_hint)
> >   tasklet_hi_schedule(&sibling->execlists.tasklet);
> >   
> > - spin_unlock(&sibling->active.lock);
> > +unlock_engine:
> > + spin_unlock_irq(&sibling->active.lock);
> > +
> > + if (intel_context_inflight(&ve->context))
> > + break;
> 
> So virtual request may not be added to all siblings any longer. Will it 
> still be able to schedule it on any if time slicing kicks in under these 
> conditions?

Yes.
 
> This is equivalent to the hunk in first_virtual_engine which also 
> removes it from all other siblings.
> 
> I guess it's inline with what the commit messages says - that new 
> sibling will be picked upon time slicing. I just don't quite

Re: [Intel-gfx] [PATCH 7/8] drm/i915/gt: Decouple inflight virtual engines

2020-05-18 Thread Tvrtko Ursulin



On 18/05/2020 14:00, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2020-05-18 13:53:29)


On 18/05/2020 09:14, Chris Wilson wrote:

Once a virtual engine has been bound to a sibling, it will remain bound
until we finally schedule out the last active request. We can not rebind
the context to a new sibling while it is inflight as the context save
will conflict, hence we wait. As we cannot then use any other sibliing
while the context is inflight, only kick the bound sibling while it
inflight and upon scheduling out the kick the rest (so that we can swap
engines on timeslicing if the previously bound engine becomes
oversubscribed).

Signed-off-by: Chris Wilson 
---
   drivers/gpu/drm/i915/gt/intel_lrc.c | 30 +
   1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7a5ac3375225..fe8f3518d6b8 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1398,9 +1398,8 @@ execlists_schedule_in(struct i915_request *rq, int idx)
   static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
   {
   struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
- struct i915_request *next = READ_ONCE(ve->request);
   
- if (next == rq || (next && next->execution_mask & ~rq->execution_mask))

+ if (READ_ONCE(ve->request))
   tasklet_hi_schedule(&ve->base.execlists.tasklet);
   }
   
@@ -1821,18 +1820,14 @@ first_virtual_engine(struct intel_engine_cs *engine)

   rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
   struct i915_request *rq = READ_ONCE(ve->request);
   
- if (!rq) { /* lazily cleanup after another engine handled rq */

+ /* lazily cleanup after another engine handled rq */
+ if (!rq || !virtual_matches(ve, rq, engine)) {
   rb_erase_cached(rb, &el->virtual);
   RB_CLEAR_NODE(rb);
   rb = rb_first_cached(&el->virtual);
   continue;
   }
   
- if (!virtual_matches(ve, rq, engine)) {

- rb = rb_next(rb);
- continue;
- }
-
   return ve;
   }
   
@@ -5478,7 +5473,6 @@ static void virtual_submission_tasklet(unsigned long data)

   if (unlikely(!mask))
   return;
   
- local_irq_disable();

   for (n = 0; n < ve->num_siblings; n++) {
   struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
   struct ve_node * const node = &ve->nodes[sibling->id];
@@ -5488,20 +5482,19 @@ static void virtual_submission_tasklet(unsigned long 
data)
   if (!READ_ONCE(ve->request))
   break; /* already handled by a sibling's tasklet */
   
+ spin_lock_irq(&sibling->active.lock);

+
   if (unlikely(!(mask & sibling->mask))) {
   if (!RB_EMPTY_NODE(&node->rb)) {
- spin_lock(&sibling->active.lock);
   rb_erase_cached(&node->rb,
   &sibling->execlists.virtual);
   RB_CLEAR_NODE(&node->rb);
- spin_unlock(&sibling->active.lock);
   }
- continue;
- }
   
- spin_lock(&sibling->active.lock);

+ goto unlock_engine;
+ }
   
- if (!RB_EMPTY_NODE(&node->rb)) {

+ if (unlikely(!RB_EMPTY_NODE(&node->rb))) {
   /*
* Cheat and avoid rebalancing the tree if we can
* reuse this node in situ.
@@ -5541,9 +5534,12 @@ static void virtual_submission_tasklet(unsigned long 
data)
   if (first && prio >= sibling->execlists.queue_priority_hint)
   tasklet_hi_schedule(&sibling->execlists.tasklet);
   
- spin_unlock(&sibling->active.lock);

+unlock_engine:
+ spin_unlock_irq(&sibling->active.lock);
+
+ if (intel_context_inflight(&ve->context))
+ break;


So virtual request may not be added to all siblings any longer. Will it
still be able to schedule it on any if time slicing kicks in under these
conditions?


Yes.
  

This is equivalent to the hunk in first_virtual_engine which also
removes it from all other siblings.

I guess it's inline with what the commit messages says - that new
sibling will be picked upon time slicing. I just don't quite see the
path which would do it. Only path which shuffles the siblings array
around is in dequeue, and dequeue on other that the engine which first
picked it will not happen any more. I must be missing something..


It's all on the execlists_schedule_out. During timeslicing we call
unwind_incomplete_requests which moves the requests

Re: [Intel-gfx] [PATCH 7/8] drm/i915/gt: Decouple inflight virtual engines

2020-05-18 Thread Chris Wilson
Quoting Tvrtko Ursulin (2020-05-18 15:55:46)
> 
> On 18/05/2020 14:00, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2020-05-18 13:53:29)
> >>
> >> On 18/05/2020 09:14, Chris Wilson wrote:
> >>> Once a virtual engine has been bound to a sibling, it will remain bound
> >>> until we finally schedule out the last active request. We can not rebind
> >>> the context to a new sibling while it is inflight as the context save
> >>> will conflict, hence we wait. As we cannot then use any other sibliing
> >>> while the context is inflight, only kick the bound sibling while it
> >>> inflight and upon scheduling out the kick the rest (so that we can swap
> >>> engines on timeslicing if the previously bound engine becomes
> >>> oversubscribed).
> >>>
> >>> Signed-off-by: Chris Wilson 
> >>> ---
> >>>drivers/gpu/drm/i915/gt/intel_lrc.c | 30 +
> >>>1 file changed, 13 insertions(+), 17 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> >>> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>> index 7a5ac3375225..fe8f3518d6b8 100644
> >>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> >>> @@ -1398,9 +1398,8 @@ execlists_schedule_in(struct i915_request *rq, int 
> >>> idx)
> >>>static void kick_siblings(struct i915_request *rq, struct 
> >>> intel_context *ce)
> >>>{
> >>>struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> >>> - struct i915_request *next = READ_ONCE(ve->request);
> >>>
> >>> - if (next == rq || (next && next->execution_mask & 
> >>> ~rq->execution_mask))
> >>> + if (READ_ONCE(ve->request))
> >>>tasklet_hi_schedule(&ve->base.execlists.tasklet);
> >>>}
> >>>
> >>> @@ -1821,18 +1820,14 @@ first_virtual_engine(struct intel_engine_cs 
> >>> *engine)
> >>>rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> >>>struct i915_request *rq = READ_ONCE(ve->request);
> >>>
> >>> - if (!rq) { /* lazily cleanup after another engine handled 
> >>> rq */
> >>> + /* lazily cleanup after another engine handled rq */
> >>> + if (!rq || !virtual_matches(ve, rq, engine)) {
> >>>rb_erase_cached(rb, &el->virtual);
> >>>RB_CLEAR_NODE(rb);
> >>>rb = rb_first_cached(&el->virtual);
> >>>continue;
> >>>}
> >>>
> >>> - if (!virtual_matches(ve, rq, engine)) {
> >>> - rb = rb_next(rb);
> >>> - continue;
> >>> - }
> >>> -
> >>>return ve;
> >>>}
> >>>
> >>> @@ -5478,7 +5473,6 @@ static void virtual_submission_tasklet(unsigned 
> >>> long data)
> >>>if (unlikely(!mask))
> >>>return;
> >>>
> >>> - local_irq_disable();
> >>>for (n = 0; n < ve->num_siblings; n++) {
> >>>struct intel_engine_cs *sibling = 
> >>> READ_ONCE(ve->siblings[n]);
> >>>struct ve_node * const node = &ve->nodes[sibling->id];
> >>> @@ -5488,20 +5482,19 @@ static void virtual_submission_tasklet(unsigned 
> >>> long data)
> >>>if (!READ_ONCE(ve->request))
> >>>break; /* already handled by a sibling's tasklet */
> >>>
> >>> + spin_lock_irq(&sibling->active.lock);
> >>> +
> >>>if (unlikely(!(mask & sibling->mask))) {
> >>>if (!RB_EMPTY_NODE(&node->rb)) {
> >>> - spin_lock(&sibling->active.lock);
> >>>rb_erase_cached(&node->rb,
> >>>
> >>> &sibling->execlists.virtual);
> >>>RB_CLEAR_NODE(&node->rb);
> >>> - spin_unlock(&sibling->active.lock);
> >>>}
> >>> - continue;
> >>> - }
> >>>
> >>> - spin_lock(&sibling->active.lock);
> >>> + goto unlock_engine;
> >>> + }
> >>>
> >>> - if (!RB_EMPTY_NODE(&node->rb)) {
> >>> + if (unlikely(!RB_EMPTY_NODE(&node->rb))) {
> >>>/*
> >>> * Cheat and avoid rebalancing the tree if we can
> >>> * reuse this node in situ.
> >>> @@ -5541,9 +5534,12 @@ static void virtual_submission_tasklet(unsigned 
> >>> long data)
> >>>if (first && prio >= 
> >>> sibling->execlists.queue_priority_hint)
> >>>tasklet_hi_schedule(&sibling->execlists.tasklet);
> >>>
> >>> - spin_unlock(&sibling->active.lock);
> >>> +unlock_engine:
> >>> + spin_unlock_irq(&sibling->active.lock);
> >>> +
> >>> + if (intel_context_inflight(&ve->context))
> >>> + break;
> >>
> >> So virtual request may not be add

Re: [Intel-gfx] [PATCH 7/8] drm/i915/gt: Decouple inflight virtual engines

2020-05-18 Thread Chris Wilson
Quoting Chris Wilson (2020-05-18 16:40:15)
> Quoting Tvrtko Ursulin (2020-05-18 15:55:46)
> > 
> > On 18/05/2020 14:00, Chris Wilson wrote:
> > > Quoting Tvrtko Ursulin (2020-05-18 13:53:29)
> > >>
> > >> On 18/05/2020 09:14, Chris Wilson wrote:
> > >>> Once a virtual engine has been bound to a sibling, it will remain bound
> > >>> until we finally schedule out the last active request. We can not rebind
> > >>> the context to a new sibling while it is inflight as the context save
> > >>> will conflict, hence we wait. As we cannot then use any other sibliing
> > >>> while the context is inflight, only kick the bound sibling while it
> > >>> inflight and upon scheduling out the kick the rest (so that we can swap
> > >>> engines on timeslicing if the previously bound engine becomes
> > >>> oversubscribed).
> > >>>
> > >>> Signed-off-by: Chris Wilson 
> > >>> ---
> > >>>drivers/gpu/drm/i915/gt/intel_lrc.c | 30 
> > >>> +
> > >>>1 file changed, 13 insertions(+), 17 deletions(-)
> > >>>
> > >>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> > >>> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > >>> index 7a5ac3375225..fe8f3518d6b8 100644
> > >>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > >>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > >>> @@ -1398,9 +1398,8 @@ execlists_schedule_in(struct i915_request *rq, 
> > >>> int idx)
> > >>>static void kick_siblings(struct i915_request *rq, struct 
> > >>> intel_context *ce)
> > >>>{
> > >>>struct virtual_engine *ve = container_of(ce, typeof(*ve), 
> > >>> context);
> > >>> - struct i915_request *next = READ_ONCE(ve->request);
> > >>>
> > >>> - if (next == rq || (next && next->execution_mask & 
> > >>> ~rq->execution_mask))
> > >>> + if (READ_ONCE(ve->request))
> > >>>tasklet_hi_schedule(&ve->base.execlists.tasklet);
> > >>>}
> > >>>
> > >>> @@ -1821,18 +1820,14 @@ first_virtual_engine(struct intel_engine_cs 
> > >>> *engine)
> > >>>rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
> > >>>struct i915_request *rq = READ_ONCE(ve->request);
> > >>>
> > >>> - if (!rq) { /* lazily cleanup after another engine handled 
> > >>> rq */
> > >>> + /* lazily cleanup after another engine handled rq */
> > >>> + if (!rq || !virtual_matches(ve, rq, engine)) {
> > >>>rb_erase_cached(rb, &el->virtual);
> > >>>RB_CLEAR_NODE(rb);
> > >>>rb = rb_first_cached(&el->virtual);
> > >>>continue;
> > >>>}
> > >>>
> > >>> - if (!virtual_matches(ve, rq, engine)) {
> > >>> - rb = rb_next(rb);
> > >>> - continue;
> > >>> - }
> > >>> -
> > >>>return ve;
> > >>>}
> > >>>
> > >>> @@ -5478,7 +5473,6 @@ static void virtual_submission_tasklet(unsigned 
> > >>> long data)
> > >>>if (unlikely(!mask))
> > >>>return;
> > >>>
> > >>> - local_irq_disable();
> > >>>for (n = 0; n < ve->num_siblings; n++) {
> > >>>struct intel_engine_cs *sibling = 
> > >>> READ_ONCE(ve->siblings[n]);
> > >>>struct ve_node * const node = &ve->nodes[sibling->id];
> > >>> @@ -5488,20 +5482,19 @@ static void virtual_submission_tasklet(unsigned 
> > >>> long data)
> > >>>if (!READ_ONCE(ve->request))
> > >>>break; /* already handled by a sibling's tasklet 
> > >>> */
> > >>>
> > >>> + spin_lock_irq(&sibling->active.lock);
> > >>> +
> > >>>if (unlikely(!(mask & sibling->mask))) {
> > >>>if (!RB_EMPTY_NODE(&node->rb)) {
> > >>> - spin_lock(&sibling->active.lock);
> > >>>rb_erase_cached(&node->rb,
> > >>>
> > >>> &sibling->execlists.virtual);
> > >>>RB_CLEAR_NODE(&node->rb);
> > >>> - spin_unlock(&sibling->active.lock);
> > >>>}
> > >>> - continue;
> > >>> - }
> > >>>
> > >>> - spin_lock(&sibling->active.lock);
> > >>> + goto unlock_engine;
> > >>> + }
> > >>>
> > >>> - if (!RB_EMPTY_NODE(&node->rb)) {
> > >>> + if (unlikely(!RB_EMPTY_NODE(&node->rb))) {
> > >>>/*
> > >>> * Cheat and avoid rebalancing the tree if we can
> > >>> * reuse this node in situ.
> > >>> @@ -5541,9 +5534,12 @@ static void virtual_submission_tasklet(unsigned 
> > >>> long data)
> > >>>if (first && prio >= 
> > >>> sibling->execlists.queue_priority_hint)
> > >>>tasklet_hi_schedule(&sibling->execlists.tasklet);
> > >>>
> >