Re: [Intel-gfx] [PATCH 21/26] drm/i915: Multi-BB execbuf
On Tue, Oct 12, 2021 at 02:22:41PM -0700, John Harrison wrote: > On 10/4/2021 15:06, Matthew Brost wrote: > > Allow multiple batch buffers to be submitted in a single execbuf IOCTL > > after a context has been configured with the 'set_parallel' extension. > > The number batches is implicit based on the contexts configuration. > > > > This is implemented with a series of loops. First a loop is used to find > > all the batches, a loop to pin all the HW contexts, a loop to create all > > the requests, a loop to submit (emit BB start, etc...) all the requests, > > a loop to tie the requests to the VMAs they touch, and finally a loop to > > commit the requests to the backend. > > > > A composite fence is also created for the generated requests to return > > to the user and to stick in dma resv slots. > > > > No behavior from the existing IOCTL should be changed aside from when > > throttling because the ring for a context is full, wait on the request > throttling because the ring for -> throttling the ring because > > full, wait -> full. In this situation, i915 will now wait > Yep. > > while holding the object locks. > , previously it would have dropped the locks for the wait. > > And maybe explain why this change is necessary? > We could drop the lock but it would make the code way more complicated and IMO simpler code far out weighs the potential benefit of dropping the lock. The dropping of the lock probably was a premature optimization that landed in the code without any data backing it up that it helped in any meaningful way. I can add a comment stating this. > > > > > IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071=1 > > media UMD: https://github.com/intel/media-driver/pull/1252 > > > > v2: > > (Matthew Brost) > >- Return proper error value if i915_request_create fails > > v3: > > (John Harrison) > >- Add comment explaining create / add order loops + locking > >- Update commit message explaining different in IOCTL behavior > >- Line wrap some comments > >- eb_add_request returns void > >- Return -EINVAL rather triggering BUG_ON if cmd parser used > > (Checkpatch) > >- Check eb->batch_len[*current_batch] > > > > Signed-off-by: Matthew Brost > > --- > > .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 793 -- > > drivers/gpu/drm/i915/gt/intel_context.h | 8 +- > > drivers/gpu/drm/i915/gt/intel_context_types.h | 10 + > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 + > > drivers/gpu/drm/i915/i915_request.h | 9 + > > drivers/gpu/drm/i915/i915_vma.c | 21 +- > > drivers/gpu/drm/i915/i915_vma.h | 13 +- > > 7 files changed, 599 insertions(+), 257 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > index 2f2434b52317..5c7fb6f68bbb 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > @@ -244,17 +244,25 @@ struct i915_execbuffer { > > struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ > > struct eb_vma *vma; > > - struct intel_engine_cs *engine; /** engine to queue the request to */ > > + struct intel_gt *gt; /* gt for the execbuf */ > > struct intel_context *context; /* logical state for the request */ > > struct i915_gem_context *gem_context; /** caller's context */ > > - struct i915_request *request; /** our request to build */ > > - struct eb_vma *batch; /** identity of the batch obj/vma */ > > + /** our requests to build */ > > + struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; > > + /** identity of the batch obj/vma */ > > + struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1]; > > struct i915_vma *trampoline; /** trampoline used for chaining */ > > + /** used for excl fence in dma_resv objects when > 1 BB submitted */ > > + struct dma_fence *composite_fence; > > + > > /** actual size of execobj[] as we may extend it for the cmdparser */ > > unsigned int buffer_count; > > + /* number of batches in execbuf IOCTL */ > > + unsigned int num_batches; > > + > > /** list of vma not yet bound during reservation phase */ > > struct list_head unbound; > > @@ -281,7 +289,8 @@ struct i915_execbuffer { > > u64 invalid_flags; /** Set of execobj.flags that are invalid */ > > - u64 batch_len; /** Length of batch within object */ > > + /** Length of batch within object */ > > + u64 batch_len[MAX_ENGINE_INSTANCE + 1]; > > u32 batch_start_offset; /** Location within object of batch */ > > u32 batch_flags; /** Flags composed for emit_bb_start() */ > > struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch > > buffer */ > > @@ -299,14 +308,13 @@ struct i915_execbuffer { > > }; > > static int eb_parse(struct i915_execbuffer *eb); > > -static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, > > -
Re: [Intel-gfx] [PATCH 21/26] drm/i915: Multi-BB execbuf
On 10/4/2021 15:06, Matthew Brost wrote: Allow multiple batch buffers to be submitted in a single execbuf IOCTL after a context has been configured with the 'set_parallel' extension. The number batches is implicit based on the contexts configuration. This is implemented with a series of loops. First a loop is used to find all the batches, a loop to pin all the HW contexts, a loop to create all the requests, a loop to submit (emit BB start, etc...) all the requests, a loop to tie the requests to the VMAs they touch, and finally a loop to commit the requests to the backend. A composite fence is also created for the generated requests to return to the user and to stick in dma resv slots. No behavior from the existing IOCTL should be changed aside from when throttling because the ring for a context is full, wait on the request throttling because the ring for -> throttling the ring because full, wait -> full. In this situation, i915 will now wait while holding the object locks. , previously it would have dropped the locks for the wait. And maybe explain why this change is necessary? IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071=1 media UMD: https://github.com/intel/media-driver/pull/1252 v2: (Matthew Brost) - Return proper error value if i915_request_create fails v3: (John Harrison) - Add comment explaining create / add order loops + locking - Update commit message explaining different in IOCTL behavior - Line wrap some comments - eb_add_request returns void - Return -EINVAL rather triggering BUG_ON if cmd parser used (Checkpatch) - Check eb->batch_len[*current_batch] Signed-off-by: Matthew Brost --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 793 -- drivers/gpu/drm/i915/gt/intel_context.h | 8 +- drivers/gpu/drm/i915/gt/intel_context_types.h | 10 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 + drivers/gpu/drm/i915/i915_request.h | 9 + drivers/gpu/drm/i915/i915_vma.c | 21 +- drivers/gpu/drm/i915/i915_vma.h | 13 +- 7 files changed, 599 insertions(+), 257 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 2f2434b52317..5c7fb6f68bbb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -244,17 +244,25 @@ struct i915_execbuffer { struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ struct eb_vma *vma; - struct intel_engine_cs *engine; /** engine to queue the request to */ + struct intel_gt *gt; /* gt for the execbuf */ struct intel_context *context; /* logical state for the request */ struct i915_gem_context *gem_context; /** caller's context */ - struct i915_request *request; /** our request to build */ - struct eb_vma *batch; /** identity of the batch obj/vma */ + /** our requests to build */ + struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; + /** identity of the batch obj/vma */ + struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1]; struct i915_vma *trampoline; /** trampoline used for chaining */ + /** used for excl fence in dma_resv objects when > 1 BB submitted */ + struct dma_fence *composite_fence; + /** actual size of execobj[] as we may extend it for the cmdparser */ unsigned int buffer_count; + /* number of batches in execbuf IOCTL */ + unsigned int num_batches; + /** list of vma not yet bound during reservation phase */ struct list_head unbound; @@ -281,7 +289,8 @@ struct i915_execbuffer { u64 invalid_flags; /** Set of execobj.flags that are invalid */ - u64 batch_len; /** Length of batch within object */ + /** Length of batch within object */ + u64 batch_len[MAX_ENGINE_INSTANCE + 1]; u32 batch_start_offset; /** Location within object of batch */ u32 batch_flags; /** Flags composed for emit_bb_start() */ struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ @@ -299,14 +308,13 @@ struct i915_execbuffer { }; static int eb_parse(struct i915_execbuffer *eb); -static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, - bool throttle); +static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); static void eb_unpin_engine(struct i915_execbuffer *eb); static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { - return intel_engine_requires_cmd_parser(eb->engine) || - (intel_engine_using_cmd_parser(eb->engine) && + return intel_engine_requires_cmd_parser(eb->context->engine) || + (intel_engine_using_cmd_parser(eb->context->engine) && eb->args->batch_len); } @@ -544,11 +552,21 @@ eb_validate_vma(struct i915_execbuffer *eb, return 0;
Re: [Intel-gfx] [PATCH 21/26] drm/i915: Multi-BB execbuf
On Mon, Oct 04, 2021 at 03:06:32PM -0700, Matthew Brost wrote: > Allow multiple batch buffers to be submitted in a single execbuf IOCTL > after a context has been configured with the 'set_parallel' extension. > The number batches is implicit based on the contexts configuration. > > This is implemented with a series of loops. First a loop is used to find > all the batches, a loop to pin all the HW contexts, a loop to create all > the requests, a loop to submit (emit BB start, etc...) all the requests, > a loop to tie the requests to the VMAs they touch, and finally a loop to > commit the requests to the backend. > > A composite fence is also created for the generated requests to return > to the user and to stick in dma resv slots. > > No behavior from the existing IOCTL should be changed aside from when > throttling because the ring for a context is full, wait on the request > while holding the object locks. > > IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071=1 > media UMD: https://github.com/intel/media-driver/pull/1252 > > v2: > (Matthew Brost) > - Return proper error value if i915_request_create fails > v3: > (John Harrison) > - Add comment explaining create / add order loops + locking > - Update commit message explaining different in IOCTL behavior > - Line wrap some comments > - eb_add_request returns void > - Return -EINVAL rather triggering BUG_ON if cmd parser used > (Checkpatch) > - Check eb->batch_len[*current_batch] > > Signed-off-by: Matthew Brost > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 793 -- > drivers/gpu/drm/i915/gt/intel_context.h | 8 +- > drivers/gpu/drm/i915/gt/intel_context_types.h | 10 + > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 + > drivers/gpu/drm/i915/i915_request.h | 9 + > drivers/gpu/drm/i915/i915_vma.c | 21 +- > drivers/gpu/drm/i915/i915_vma.h | 13 +- > 7 files changed, 599 insertions(+), 257 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 2f2434b52317..5c7fb6f68bbb 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -244,17 +244,25 @@ struct i915_execbuffer { > struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ > struct eb_vma *vma; > > - struct intel_engine_cs *engine; /** engine to queue the request to */ > + struct intel_gt *gt; /* gt for the execbuf */ > struct intel_context *context; /* logical state for the request */ > struct i915_gem_context *gem_context; /** caller's context */ > > - struct i915_request *request; /** our request to build */ > - struct eb_vma *batch; /** identity of the batch obj/vma */ > + /** our requests to build */ > + struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; > + /** identity of the batch obj/vma */ > + struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1]; > struct i915_vma *trampoline; /** trampoline used for chaining */ > > + /** used for excl fence in dma_resv objects when > 1 BB submitted */ > + struct dma_fence *composite_fence; > + > /** actual size of execobj[] as we may extend it for the cmdparser */ > unsigned int buffer_count; > > + /* number of batches in execbuf IOCTL */ > + unsigned int num_batches; > + > /** list of vma not yet bound during reservation phase */ > struct list_head unbound; > > @@ -281,7 +289,8 @@ struct i915_execbuffer { > > u64 invalid_flags; /** Set of execobj.flags that are invalid */ > > - u64 batch_len; /** Length of batch within object */ > + /** Length of batch within object */ > + u64 batch_len[MAX_ENGINE_INSTANCE + 1]; > u32 batch_start_offset; /** Location within object of batch */ > u32 batch_flags; /** Flags composed for emit_bb_start() */ > struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch > buffer */ > @@ -299,14 +308,13 @@ struct i915_execbuffer { > }; > > static int eb_parse(struct i915_execbuffer *eb); > -static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, > - bool throttle); > +static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); > static void eb_unpin_engine(struct i915_execbuffer *eb); > > static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) > { > - return intel_engine_requires_cmd_parser(eb->engine) || > - (intel_engine_using_cmd_parser(eb->engine) && > + return intel_engine_requires_cmd_parser(eb->context->engine) || > + (intel_engine_using_cmd_parser(eb->context->engine) && >eb->args->batch_len); > } > > @@ -544,11 +552,21 @@ eb_validate_vma(struct i915_execbuffer *eb, > return 0; > } > > -static void > +static inline bool > +is_batch_buffer(struct
Re: [Intel-gfx] [PATCH 21/26] drm/i915: Multi-BB execbuf
On Mon, Oct 04, 2021 at 03:06:32PM -0700, Matthew Brost wrote: > Allow multiple batch buffers to be submitted in a single execbuf IOCTL > after a context has been configured with the 'set_parallel' extension. > The number batches is implicit based on the contexts configuration. > > This is implemented with a series of loops. First a loop is used to find > all the batches, a loop to pin all the HW contexts, a loop to create all > the requests, a loop to submit (emit BB start, etc...) all the requests, > a loop to tie the requests to the VMAs they touch, and finally a loop to > commit the requests to the backend. > > A composite fence is also created for the generated requests to return > to the user and to stick in dma resv slots. > > No behavior from the existing IOCTL should be changed aside from when > throttling because the ring for a context is full, wait on the request > while holding the object locks. > > IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071=1 > media UMD: https://github.com/intel/media-driver/pull/1252 > > v2: > (Matthew Brost) > - Return proper error value if i915_request_create fails > v3: > (John Harrison) > - Add comment explaining create / add order loops + locking > - Update commit message explaining different in IOCTL behavior > - Line wrap some comments > - eb_add_request returns void > - Return -EINVAL rather triggering BUG_ON if cmd parser used > (Checkpatch) > - Check eb->batch_len[*current_batch] > > Signed-off-by: Matthew Brost > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 793 -- > drivers/gpu/drm/i915/gt/intel_context.h | 8 +- > drivers/gpu/drm/i915/gt/intel_context_types.h | 10 + > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 + > drivers/gpu/drm/i915/i915_request.h | 9 + > drivers/gpu/drm/i915/i915_vma.c | 21 +- > drivers/gpu/drm/i915/i915_vma.h | 13 +- > 7 files changed, 599 insertions(+), 257 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 2f2434b52317..5c7fb6f68bbb 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -244,17 +244,25 @@ struct i915_execbuffer { > struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ > struct eb_vma *vma; > > - struct intel_engine_cs *engine; /** engine to queue the request to */ > + struct intel_gt *gt; /* gt for the execbuf */ > struct intel_context *context; /* logical state for the request */ > struct i915_gem_context *gem_context; /** caller's context */ > > - struct i915_request *request; /** our request to build */ > - struct eb_vma *batch; /** identity of the batch obj/vma */ > + /** our requests to build */ > + struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; > + /** identity of the batch obj/vma */ > + struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1]; > struct i915_vma *trampoline; /** trampoline used for chaining */ > > + /** used for excl fence in dma_resv objects when > 1 BB submitted */ > + struct dma_fence *composite_fence; > + > /** actual size of execobj[] as we may extend it for the cmdparser */ > unsigned int buffer_count; > > + /* number of batches in execbuf IOCTL */ > + unsigned int num_batches; > + > /** list of vma not yet bound during reservation phase */ > struct list_head unbound; > > @@ -281,7 +289,8 @@ struct i915_execbuffer { > > u64 invalid_flags; /** Set of execobj.flags that are invalid */ > > - u64 batch_len; /** Length of batch within object */ > + /** Length of batch within object */ > + u64 batch_len[MAX_ENGINE_INSTANCE + 1]; > u32 batch_start_offset; /** Location within object of batch */ > u32 batch_flags; /** Flags composed for emit_bb_start() */ > struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch > buffer */ > @@ -299,14 +308,13 @@ struct i915_execbuffer { > }; > > static int eb_parse(struct i915_execbuffer *eb); > -static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, > - bool throttle); > +static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); > static void eb_unpin_engine(struct i915_execbuffer *eb); > > static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) > { > - return intel_engine_requires_cmd_parser(eb->engine) || > - (intel_engine_using_cmd_parser(eb->engine) && > + return intel_engine_requires_cmd_parser(eb->context->engine) || > + (intel_engine_using_cmd_parser(eb->context->engine) && >eb->args->batch_len); > } > > @@ -544,11 +552,21 @@ eb_validate_vma(struct i915_execbuffer *eb, > return 0; > } > > -static void > +static inline bool > +is_batch_buffer(struct
Re: [Intel-gfx] [PATCH 21/26] drm/i915: Multi-BB execbuf
Hi Matthew, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master airlied/drm-next v5.15-rc3 next-20210922] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Matthew-Brost/Parallel-submission-aka-multi-bb-execbuf/20211005-061424 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-randconfig-a004-20211004 (attached as .config) compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project c0039de2953d15815448b4b3c3bafb45607781e0) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/758202922dad66c1b302eb34a141961acbefe417 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Matthew-Brost/Parallel-submission-aka-multi-bb-execbuf/20211005-061424 git checkout 758202922dad66c1b302eb34a141961acbefe417 # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=i386 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All warnings (new ones prefixed by >>): >> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:2361:6: warning: variable >> 'rq' is used uninitialized whenever 'if' condition is false >> [-Wsometimes-uninitialized] if (throttle) ^~~~ drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:2365:6: note: uninitialized use occurs here if (rq) { ^~ drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:2361:2: note: remove the 'if' if its condition is always true if (throttle) ^ drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:2346:25: note: initialize the variable 'rq' to silence this warning struct i915_request *rq; ^ = NULL 1 warning generated. vim +2361 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c e5dadff4b09376 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Chris Wilson 2019-08-15 2341 758202922dad66 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Matthew Brost 2021-10-04 2342 static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce, 758202922dad66 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Matthew Brost 2021-10-04 2343 bool throttle) 8f2a1057d6ec21 drivers/gpu/drm/i915/i915_gem_execbuffer.c Chris Wilson 2019-04-25 2344 { e5dadff4b09376 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Chris Wilson 2019-08-15 2345struct intel_timeline *tl; 758202922dad66 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Matthew Brost 2021-10-04 2346struct i915_request *rq; 8f2a1057d6ec21 drivers/gpu/drm/i915/i915_gem_execbuffer.c Chris Wilson 2019-04-25 2347 a4e57f9031ccd5 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Chris Wilson 2019-08-04 2348/* a4e57f9031ccd5 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Chris Wilson 2019-08-04 2349 * Take a local wakeref for preparing to dispatch the execbuf as a4e57f9031ccd5 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Chris Wilson 2019-08-04 2350 * we expect to access the hardware fairly frequently in the a4e57f9031ccd5 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Chris Wilson 2019-08-04 2351 * process, and require the engine to be kept awake between accesses. a4e57f9031ccd5 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Chris Wilson 2019-08-04 2352 * Upon dispatch, we acquire another prolonged wakeref that we hold a4e57f9031ccd5 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Chris Wilson 2019-08-04 2353 * until the timeline is idle, which in turn releases the wakeref a4e57f9031ccd5 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Chris Wilson 2019-08-04 2354 * taken on the engine, and the parent device. a4e57f9031ccd5 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Chris Wilson 2019-08-04 2355 */ e5dadff4b09376 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Chris Wilson 2019-08-15 2356tl = intel_context_timeline_lock(ce); 758202922dad66 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Matthew Brost 2021-10-04 2357if (IS_ERR(tl)) 758202922dad66 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c Matthew Brost 2021-10-04 2358return PTR_ERR(tl); a4e57f9031ccd5
[Intel-gfx] [PATCH 21/26] drm/i915: Multi-BB execbuf
Allow multiple batch buffers to be submitted in a single execbuf IOCTL after a context has been configured with the 'set_parallel' extension. The number batches is implicit based on the contexts configuration. This is implemented with a series of loops. First a loop is used to find all the batches, a loop to pin all the HW contexts, a loop to create all the requests, a loop to submit (emit BB start, etc...) all the requests, a loop to tie the requests to the VMAs they touch, and finally a loop to commit the requests to the backend. A composite fence is also created for the generated requests to return to the user and to stick in dma resv slots. No behavior from the existing IOCTL should be changed aside from when throttling because the ring for a context is full, wait on the request while holding the object locks. IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071=1 media UMD: https://github.com/intel/media-driver/pull/1252 v2: (Matthew Brost) - Return proper error value if i915_request_create fails v3: (John Harrison) - Add comment explaining create / add order loops + locking - Update commit message explaining different in IOCTL behavior - Line wrap some comments - eb_add_request returns void - Return -EINVAL rather triggering BUG_ON if cmd parser used (Checkpatch) - Check eb->batch_len[*current_batch] Signed-off-by: Matthew Brost --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 793 -- drivers/gpu/drm/i915/gt/intel_context.h | 8 +- drivers/gpu/drm/i915/gt/intel_context_types.h | 10 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 + drivers/gpu/drm/i915/i915_request.h | 9 + drivers/gpu/drm/i915/i915_vma.c | 21 +- drivers/gpu/drm/i915/i915_vma.h | 13 +- 7 files changed, 599 insertions(+), 257 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 2f2434b52317..5c7fb6f68bbb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -244,17 +244,25 @@ struct i915_execbuffer { struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ struct eb_vma *vma; - struct intel_engine_cs *engine; /** engine to queue the request to */ + struct intel_gt *gt; /* gt for the execbuf */ struct intel_context *context; /* logical state for the request */ struct i915_gem_context *gem_context; /** caller's context */ - struct i915_request *request; /** our request to build */ - struct eb_vma *batch; /** identity of the batch obj/vma */ + /** our requests to build */ + struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; + /** identity of the batch obj/vma */ + struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1]; struct i915_vma *trampoline; /** trampoline used for chaining */ + /** used for excl fence in dma_resv objects when > 1 BB submitted */ + struct dma_fence *composite_fence; + /** actual size of execobj[] as we may extend it for the cmdparser */ unsigned int buffer_count; + /* number of batches in execbuf IOCTL */ + unsigned int num_batches; + /** list of vma not yet bound during reservation phase */ struct list_head unbound; @@ -281,7 +289,8 @@ struct i915_execbuffer { u64 invalid_flags; /** Set of execobj.flags that are invalid */ - u64 batch_len; /** Length of batch within object */ + /** Length of batch within object */ + u64 batch_len[MAX_ENGINE_INSTANCE + 1]; u32 batch_start_offset; /** Location within object of batch */ u32 batch_flags; /** Flags composed for emit_bb_start() */ struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */ @@ -299,14 +308,13 @@ struct i915_execbuffer { }; static int eb_parse(struct i915_execbuffer *eb); -static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, - bool throttle); +static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle); static void eb_unpin_engine(struct i915_execbuffer *eb); static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { - return intel_engine_requires_cmd_parser(eb->engine) || - (intel_engine_using_cmd_parser(eb->engine) && + return intel_engine_requires_cmd_parser(eb->context->engine) || + (intel_engine_using_cmd_parser(eb->context->engine) && eb->args->batch_len); } @@ -544,11 +552,21 @@ eb_validate_vma(struct i915_execbuffer *eb, return 0; } -static void +static inline bool +is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx) +{ + return eb->args->flags & I915_EXEC_BATCH_FIRST ? + buffer_idx < eb->num_batches : + buffer_idx >= eb->args->buffer_count - eb->num_batches; +}