[Intel-gfx] [PATCH 13/27] drm/i915/guc: Take context ref when cancelling request

2021-08-25 Thread Matthew Brost
A context can get destroyed after cancelling a request, if a context or
GT reset occurs, so take a reference to context when cancelling a
request.

Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation")
Signed-off-by: Matthew Brost 
Reviewed-by: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 5844bb954922..1cb97e98871c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1630,8 +1630,10 @@ static void guc_context_cancel_request(struct 
intel_context *ce,
   struct i915_request *rq)
 {
if (i915_sw_fence_signaled(&rq->submit)) {
-   struct i915_sw_fence *fence = guc_context_block(ce);
+   struct i915_sw_fence *fence;
 
+   intel_context_get(ce);
+   fence = guc_context_block(ce);
i915_sw_fence_wait(fence);
if (!i915_request_completed(rq)) {
__i915_request_skip(rq);
@@ -1646,6 +1648,7 @@ static void guc_context_cancel_request(struct 
intel_context *ce,
flush_work(&ce_to_guc(ce)->ct.requests.worker);
 
guc_context_unblock(ce);
+   intel_context_put(ce);
}
 }
 
-- 
2.32.0



Re: [Intel-gfx] [PATCH 13/27] drm/i915/guc: Take context ref when cancelling request

2021-08-24 Thread Daniele Ceraolo Spurio




On 8/24/2021 8:42 AM, Matthew Brost wrote:

On Fri, Aug 20, 2021 at 05:07:27PM -0700, Daniele Ceraolo Spurio wrote:


On 8/18/2021 11:16 PM, Matthew Brost wrote:

A context can get destroyed after cancelling a request so take a
reference to context when cancelling a request.

What's the exact race? AFAICS __i915_request_skip does not have a
context_put().

This commit message isn't quite right, it is really a context reset or a
GT reset which could result in the context getting destroyed. I haven't
actually seen this happen but this just being paranoid about ref
counting. Can fix up the commit message.


ok, with an updated commit message:

Reviewed-by: Daniele Ceraolo Spurio 

Daniele



Matt


Daniele


Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation")
Signed-off-by: Matthew Brost 
---
   drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 -
   1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index e0e85e4ad512..85f96d325048 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1620,8 +1620,10 @@ static void guc_context_cancel_request(struct 
intel_context *ce,
   struct i915_request *rq)
   {
if (i915_sw_fence_signaled(&rq->submit)) {
-   struct i915_sw_fence *fence = guc_context_block(ce);
+   struct i915_sw_fence *fence;
+   intel_context_get(ce);
+   fence = guc_context_block(ce);
i915_sw_fence_wait(fence);
if (!i915_request_completed(rq)) {
__i915_request_skip(rq);
@@ -1636,6 +1638,7 @@ static void guc_context_cancel_request(struct 
intel_context *ce,
flush_work(&ce_to_guc(ce)->ct.requests.worker);
guc_context_unblock(ce);
+   intel_context_put(ce);
}
   }




Re: [Intel-gfx] [PATCH 13/27] drm/i915/guc: Take context ref when cancelling request

2021-08-24 Thread Matthew Brost
On Fri, Aug 20, 2021 at 05:07:27PM -0700, Daniele Ceraolo Spurio wrote:
> 
> 
> On 8/18/2021 11:16 PM, Matthew Brost wrote:
> > A context can get destroyed after cancelling a request so take a
> > reference to context when cancelling a request.
> 
> What's the exact race? AFAICS __i915_request_skip does not have a
> context_put().

This commit message isn't quite right, it is really a context reset or a
GT reset which could result in the context getting destroyed. I haven't
actually seen this happen but this just being paranoid about ref
counting. Can fix up the commit message.

Matt

> 
> Daniele
> 
> > 
> > Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation")
> > Signed-off-by: Matthew Brost 
> > ---
> >   drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 -
> >   1 file changed, 4 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index e0e85e4ad512..85f96d325048 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -1620,8 +1620,10 @@ static void guc_context_cancel_request(struct 
> > intel_context *ce,
> >struct i915_request *rq)
> >   {
> > if (i915_sw_fence_signaled(&rq->submit)) {
> > -   struct i915_sw_fence *fence = guc_context_block(ce);
> > +   struct i915_sw_fence *fence;
> > +   intel_context_get(ce);
> > +   fence = guc_context_block(ce);
> > i915_sw_fence_wait(fence);
> > if (!i915_request_completed(rq)) {
> > __i915_request_skip(rq);
> > @@ -1636,6 +1638,7 @@ static void guc_context_cancel_request(struct 
> > intel_context *ce,
> > flush_work(&ce_to_guc(ce)->ct.requests.worker);
> > guc_context_unblock(ce);
> > +   intel_context_put(ce);
> > }
> >   }
> 


Re: [Intel-gfx] [PATCH 13/27] drm/i915/guc: Take context ref when cancelling request

2021-08-20 Thread Daniele Ceraolo Spurio




On 8/18/2021 11:16 PM, Matthew Brost wrote:

A context can get destroyed after cancelling a request so take a
reference to context when cancelling a request.


What's the exact race? AFAICS __i915_request_skip does not have a 
context_put().


Daniele



Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation")
Signed-off-by: Matthew Brost 
---
  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index e0e85e4ad512..85f96d325048 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1620,8 +1620,10 @@ static void guc_context_cancel_request(struct 
intel_context *ce,
   struct i915_request *rq)
  {
if (i915_sw_fence_signaled(&rq->submit)) {
-   struct i915_sw_fence *fence = guc_context_block(ce);
+   struct i915_sw_fence *fence;
  
+		intel_context_get(ce);

+   fence = guc_context_block(ce);
i915_sw_fence_wait(fence);
if (!i915_request_completed(rq)) {
__i915_request_skip(rq);
@@ -1636,6 +1638,7 @@ static void guc_context_cancel_request(struct 
intel_context *ce,
flush_work(&ce_to_guc(ce)->ct.requests.worker);
  
  		guc_context_unblock(ce);

+   intel_context_put(ce);
}
  }
  




[Intel-gfx] [PATCH 13/27] drm/i915/guc: Take context ref when cancelling request

2021-08-18 Thread Matthew Brost
A context can get destroyed after cancelling a request so take a
reference to context when cancelling a request.

Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation")
Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index e0e85e4ad512..85f96d325048 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1620,8 +1620,10 @@ static void guc_context_cancel_request(struct 
intel_context *ce,
   struct i915_request *rq)
 {
if (i915_sw_fence_signaled(&rq->submit)) {
-   struct i915_sw_fence *fence = guc_context_block(ce);
+   struct i915_sw_fence *fence;
 
+   intel_context_get(ce);
+   fence = guc_context_block(ce);
i915_sw_fence_wait(fence);
if (!i915_request_completed(rq)) {
__i915_request_skip(rq);
@@ -1636,6 +1638,7 @@ static void guc_context_cancel_request(struct 
intel_context *ce,
flush_work(&ce_to_guc(ce)->ct.requests.worker);
 
guc_context_unblock(ce);
+   intel_context_put(ce);
}
 }
 
-- 
2.32.0