Re: [Intel-gfx] [PATCH 24/26] drm/i915: Update I915_GEM_BUSY IOCTL to understand composite fences
On Tue, Oct 12, 2021 at 08:53:25AM +0100, Tvrtko Ursulin wrote: > > On 04/10/2021 23:06, Matthew Brost wrote: > > Parallel submission create composite fences (dma_fence_array) for excl / > > shared slots in objects. The I915_GEM_BUSY IOCTL checks these slots to > > determine the busyness of the object. Prior to patch it only check if > > the fence in the slot was a i915_request. Update the check to understand > > composite fences and correctly report the busyness. > > > > Signed-off-by: Matthew Brost > > --- > > drivers/gpu/drm/i915/gem/i915_gem_busy.c | 60 +++ > > .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 5 +- > > drivers/gpu/drm/i915/i915_request.h | 6 ++ > > 3 files changed, 58 insertions(+), 13 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c > > b/drivers/gpu/drm/i915/gem/i915_gem_busy.c > > index 6234e17259c1..b89d173c62eb 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c > > @@ -4,6 +4,8 @@ > >* Copyright © 2014-2016 Intel Corporation > >*/ > > +#include > > + > > #include "gt/intel_engine.h" > > #include "i915_gem_ioctls.h" > > @@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id) > > } > > static __always_inline unsigned int > > -__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) > > +__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id)) > > { > > const struct i915_request *rq; > > @@ -46,29 +48,63 @@ __busy_set_if_active(const struct dma_fence *fence, u32 > > (*flag)(u16 id)) > > * to eventually flush us, but to minimise latency just ask the > > * hardware. > > * > > -* Note we only report on the status of native fences. > > +* Note we only report on the status of native fences and we currently > > +* have two native fences: > > +* > > +* 1. A composite fence (dma_fence_array) constructed of i915 requests > > +* created during a parallel submission. In this case we deconstruct the > > +* composite fence into individual i915 requests and check the status of > > +* each request. > > +* > > +* 2. A single i915 request. > > */ > > - if (!dma_fence_is_i915(fence)) > > + if (dma_fence_is_array(fence)) { > > + struct dma_fence_array *array = to_dma_fence_array(fence); > > + struct dma_fence **child = array->fences; > > + unsigned int nchild = array->num_fences; > > + > > + do { > > + struct dma_fence *current_fence = *child++; > > + > > + /* Not an i915 fence, can't be busy per above */ > > + if (!dma_fence_is_i915(current_fence) || > > + !test_bit(I915_FENCE_FLAG_COMPOSITE, > > + _fence->flags)) { > > + return 0; > > + } > > + > > + rq = to_request(current_fence); > > + if (!i915_request_completed(rq)) { > > + BUILD_BUG_ON(!typecheck(u16, > > + > > rq->engine->uabi_class)); > > + return flag(rq->engine->uabi_class); > > + } > > + } while (--nchild); > > Do you even need to introduce I915_FENCE_FLAG_COMPOSITE? If parallel submit > is the only possible creator of array fences then possibly not. Probably > even would result in less code which even keeps working in a hypothetical > future. Otherwise you could add a debug bug on if array fence contains a > fence without I915_FENCE_FLAG_COMPOSITE set. > Certainly other drivers can create a dma fence array and in theory could include a i915_request in that array. Adding this flag makes it clear that this fence was created by i915 for parallel submission and future proofs this code. > Secondly, I'd also run the whole loop and not return on first busy or > incompatible for simplicity. > I disagree. Short circuiting when a condition is found is pretty standard and not hard to understand. > And finally, with all above in place, I think you could have common function > for the below (checking one fence) and call that both for a single fence and > from an array loop above for less duplication. (Even duplicated BUILD_BUG_ON > which makes no sense!) > Yea duplicating the BUILD_BUG_ON doesn't make a ton of sense. Will remove. Disagree on the helper, the code paths are different enough to just open code this. Matt > End result would be a simpler patch like: > > __busy_set_if_active_one(...) > { >.. existing __busy_set_if_active .. > } > > __busy_set_if_active(..) > { > ... > if (dma_fence_is_array(fence)) { > ... > for (i = 0; i < array->num_fences; i++) > flags |= __busy_set_if_active_one(...); > } else { > flags = __busy_set_if_active_one(...); > } > > Regards, > > Tvrtko > > > + > > +
Re: [Intel-gfx] [PATCH 24/26] drm/i915: Update I915_GEM_BUSY IOCTL to understand composite fences
On 04/10/2021 23:06, Matthew Brost wrote: Parallel submission create composite fences (dma_fence_array) for excl / shared slots in objects. The I915_GEM_BUSY IOCTL checks these slots to determine the busyness of the object. Prior to patch it only check if the fence in the slot was a i915_request. Update the check to understand composite fences and correctly report the busyness. Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 60 +++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 5 +- drivers/gpu/drm/i915/i915_request.h | 6 ++ 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..b89d173c62eb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -4,6 +4,8 @@ * Copyright © 2014-2016 Intel Corporation */ +#include + #include "gt/intel_engine.h" #include "i915_gem_ioctls.h" @@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id) } static __always_inline unsigned int -__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) +__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id)) { const struct i915_request *rq; @@ -46,29 +48,63 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) * to eventually flush us, but to minimise latency just ask the * hardware. * -* Note we only report on the status of native fences. +* Note we only report on the status of native fences and we currently +* have two native fences: +* +* 1. A composite fence (dma_fence_array) constructed of i915 requests +* created during a parallel submission. In this case we deconstruct the +* composite fence into individual i915 requests and check the status of +* each request. +* +* 2. A single i915 request. */ - if (!dma_fence_is_i915(fence)) + if (dma_fence_is_array(fence)) { + struct dma_fence_array *array = to_dma_fence_array(fence); + struct dma_fence **child = array->fences; + unsigned int nchild = array->num_fences; + + do { + struct dma_fence *current_fence = *child++; + + /* Not an i915 fence, can't be busy per above */ + if (!dma_fence_is_i915(current_fence) || + !test_bit(I915_FENCE_FLAG_COMPOSITE, + _fence->flags)) { + return 0; + } + + rq = to_request(current_fence); + if (!i915_request_completed(rq)) { + BUILD_BUG_ON(!typecheck(u16, + rq->engine->uabi_class)); + return flag(rq->engine->uabi_class); + } + } while (--nchild); Do you even need to introduce I915_FENCE_FLAG_COMPOSITE? If parallel submit is the only possible creator of array fences then possibly not. Probably even would result in less code which even keeps working in a hypothetical future. Otherwise you could add a debug bug on if array fence contains a fence without I915_FENCE_FLAG_COMPOSITE set. Secondly, I'd also run the whole loop and not return on first busy or incompatible for simplicity. And finally, with all above in place, I think you could have common function for the below (checking one fence) and call that both for a single fence and from an array loop above for less duplication. (Even duplicated BUILD_BUG_ON which makes no sense!) End result would be a simpler patch like: __busy_set_if_active_one(...) { .. existing __busy_set_if_active .. } __busy_set_if_active(..) { ... if (dma_fence_is_array(fence)) { ... for (i = 0; i < array->num_fences; i++) flags |= __busy_set_if_active_one(...); } else { flags = __busy_set_if_active_one(...); } Regards, Tvrtko + + /* All requests in array complete, not busy */ return 0; + } else { + if (!dma_fence_is_i915(fence)) + return 0; - /* opencode to_request() in order to avoid const warnings */ - rq = container_of(fence, const struct i915_request, fence); - if (i915_request_completed(rq)) - return 0; + rq = to_request(fence); + if (i915_request_completed(rq)) + return 0; - /* Beware type-expansion follies! */ - BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class)); - return flag(rq->engine->uabi_class); + /* Beware type-expansion follies! */ + BUILD_BUG_ON(!typecheck(u16,
Re: [Intel-gfx] [PATCH 24/26] drm/i915: Update I915_GEM_BUSY IOCTL to understand composite fences
On 10/4/2021 3:06 PM, Matthew Brost wrote: Parallel submission create composite fences (dma_fence_array) for excl / shared slots in objects. The I915_GEM_BUSY IOCTL checks these slots to determine the busyness of the object. Prior to patch it only check if the fence in the slot was a i915_request. Update the check to understand composite fences and correctly report the busyness. Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Daniele --- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 60 +++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 5 +- drivers/gpu/drm/i915/i915_request.h | 6 ++ 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..b89d173c62eb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -4,6 +4,8 @@ * Copyright © 2014-2016 Intel Corporation */ +#include + #include "gt/intel_engine.h" #include "i915_gem_ioctls.h" @@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id) } static __always_inline unsigned int -__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) +__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id)) { const struct i915_request *rq; @@ -46,29 +48,63 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) * to eventually flush us, but to minimise latency just ask the * hardware. * -* Note we only report on the status of native fences. +* Note we only report on the status of native fences and we currently +* have two native fences: +* +* 1. A composite fence (dma_fence_array) constructed of i915 requests +* created during a parallel submission. In this case we deconstruct the +* composite fence into individual i915 requests and check the status of +* each request. +* +* 2. A single i915 request. */ - if (!dma_fence_is_i915(fence)) + if (dma_fence_is_array(fence)) { + struct dma_fence_array *array = to_dma_fence_array(fence); + struct dma_fence **child = array->fences; + unsigned int nchild = array->num_fences; + + do { + struct dma_fence *current_fence = *child++; + + /* Not an i915 fence, can't be busy per above */ + if (!dma_fence_is_i915(current_fence) || + !test_bit(I915_FENCE_FLAG_COMPOSITE, + _fence->flags)) { + return 0; + } + + rq = to_request(current_fence); + if (!i915_request_completed(rq)) { + BUILD_BUG_ON(!typecheck(u16, + rq->engine->uabi_class)); + return flag(rq->engine->uabi_class); + } + } while (--nchild); + + /* All requests in array complete, not busy */ return 0; + } else { + if (!dma_fence_is_i915(fence)) + return 0; - /* opencode to_request() in order to avoid const warnings */ - rq = container_of(fence, const struct i915_request, fence); - if (i915_request_completed(rq)) - return 0; + rq = to_request(fence); + if (i915_request_completed(rq)) + return 0; - /* Beware type-expansion follies! */ - BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class)); - return flag(rq->engine->uabi_class); + /* Beware type-expansion follies! */ + BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class)); + return flag(rq->engine->uabi_class); + } } static __always_inline unsigned int -busy_check_reader(const struct dma_fence *fence) +busy_check_reader(struct dma_fence *fence) { return __busy_set_if_active(fence, __busy_read_flag); } static __always_inline unsigned int -busy_check_writer(const struct dma_fence *fence) +busy_check_writer(struct dma_fence *fence) { if (!fence) return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5c7fb6f68bbb..16276f406fd6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2988,8 +2988,11 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) if (!fences) return ERR_PTR(-ENOMEM); - for_each_batch_create_order(eb, i) + for_each_batch_create_order(eb, i) { fences[i] = >requests[i]->fence; +
[Intel-gfx] [PATCH 24/26] drm/i915: Update I915_GEM_BUSY IOCTL to understand composite fences
Parallel submission create composite fences (dma_fence_array) for excl / shared slots in objects. The I915_GEM_BUSY IOCTL checks these slots to determine the busyness of the object. Prior to patch it only check if the fence in the slot was a i915_request. Update the check to understand composite fences and correctly report the busyness. Signed-off-by: Matthew Brost --- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 60 +++ .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 5 +- drivers/gpu/drm/i915/i915_request.h | 6 ++ 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..b89d173c62eb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -4,6 +4,8 @@ * Copyright © 2014-2016 Intel Corporation */ +#include + #include "gt/intel_engine.h" #include "i915_gem_ioctls.h" @@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id) } static __always_inline unsigned int -__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) +__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id)) { const struct i915_request *rq; @@ -46,29 +48,63 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id)) * to eventually flush us, but to minimise latency just ask the * hardware. * -* Note we only report on the status of native fences. +* Note we only report on the status of native fences and we currently +* have two native fences: +* +* 1. A composite fence (dma_fence_array) constructed of i915 requests +* created during a parallel submission. In this case we deconstruct the +* composite fence into individual i915 requests and check the status of +* each request. +* +* 2. A single i915 request. */ - if (!dma_fence_is_i915(fence)) + if (dma_fence_is_array(fence)) { + struct dma_fence_array *array = to_dma_fence_array(fence); + struct dma_fence **child = array->fences; + unsigned int nchild = array->num_fences; + + do { + struct dma_fence *current_fence = *child++; + + /* Not an i915 fence, can't be busy per above */ + if (!dma_fence_is_i915(current_fence) || + !test_bit(I915_FENCE_FLAG_COMPOSITE, + _fence->flags)) { + return 0; + } + + rq = to_request(current_fence); + if (!i915_request_completed(rq)) { + BUILD_BUG_ON(!typecheck(u16, + rq->engine->uabi_class)); + return flag(rq->engine->uabi_class); + } + } while (--nchild); + + /* All requests in array complete, not busy */ return 0; + } else { + if (!dma_fence_is_i915(fence)) + return 0; - /* opencode to_request() in order to avoid const warnings */ - rq = container_of(fence, const struct i915_request, fence); - if (i915_request_completed(rq)) - return 0; + rq = to_request(fence); + if (i915_request_completed(rq)) + return 0; - /* Beware type-expansion follies! */ - BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class)); - return flag(rq->engine->uabi_class); + /* Beware type-expansion follies! */ + BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class)); + return flag(rq->engine->uabi_class); + } } static __always_inline unsigned int -busy_check_reader(const struct dma_fence *fence) +busy_check_reader(struct dma_fence *fence) { return __busy_set_if_active(fence, __busy_read_flag); } static __always_inline unsigned int -busy_check_writer(const struct dma_fence *fence) +busy_check_writer(struct dma_fence *fence) { if (!fence) return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5c7fb6f68bbb..16276f406fd6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2988,8 +2988,11 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) if (!fences) return ERR_PTR(-ENOMEM); - for_each_batch_create_order(eb, i) + for_each_batch_create_order(eb, i) { fences[i] = >requests[i]->fence; + __set_bit(I915_FENCE_FLAG_COMPOSITE, + >requests[i]->fence.flags); + }