On Mon, Jun 19, 2017 at 11:06:50AM +0100, Chris Wilson wrote: > Passing the index of the target buffer via the reloc.target_handle is > marginally more efficient for the kernel (it can avoid some allocations, > and can use a direct lookup rather than a hash or search). It is also > useful for ourselves as we can use the index into our exec_bos for other > tasks. > > v2: Only enable HANDLE_LUT if we can use BATCH_FIRST and thereby avoid > a post-processing loop to fixup the relocations. > > Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> > Cc: Kenneth Graunke <kenn...@whitecape.org> > Cc: Matt Turner <matts...@gmail.com> > Cc: Jason Ekstrand <jason.ekstr...@intel.com> > --- > src/mesa/drivers/dri/i965/brw_context.h | 1 + > src/mesa/drivers/dri/i965/intel_batchbuffer.c | 82 > ++++++++++++++++++++------- > 2 files changed, 61 insertions(+), 22 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 2fb2cab918..93ddd0825a 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -450,6 +450,7 @@ struct intel_batchbuffer { > > uint32_t state_batch_offset; > enum brw_gpu_ring ring; > + bool has_batch_first; > bool needs_sol_reset; > bool state_base_address_emitted; > > diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c > b/src/mesa/drivers/dri/i965/intel_batchbuffer.c > index 15aaf01e52..5fa849c5a5 100644 > --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c > +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c > @@ -40,6 +40,10 @@ > > #define FILE_DEBUG_FLAG DEBUG_BUFMGR > > +#define DBG_NO_BATCH_FIRST 0 > +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 > +#define I915_EXEC_BATCH_FIRST (1 << 18)
Needs an #ifndef I think, to avoid troubles when updating libdrm. Or just properly update mesa's copy of i915_drm.h, that would be much better. > + > static void > intel_batchbuffer_reset(struct intel_batchbuffer *batch, > struct brw_bufmgr *bufmgr, > @@ -57,13 +61,33 @@ uint_key_hash(const void *key) > return (uintptr_t) key; > } > > +static int gem_param(int fd, int name) > +{ > + drm_i915_getparam_t gp; > + int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */ > + > + memset(&gp, 0, sizeof(gp)); > + gp.param = name; > + gp.value = &v; > + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) > + return -1; > + > + return v; > +} Afaict this exists as intel_get_param already, why can't we use that? > + > +static bool test_has_batch_first(int fd) > +{ > + if (DBG_NO_BATCH_FIRST) > + return DBG_NO_BATCH_FIRST < 0; > + > + return gem_param(fd, I915_PARAM_HAS_EXEC_BATCH_FIRST) > 0; > +} > + > void > intel_batchbuffer_init(struct intel_batchbuffer *batch, > struct brw_bufmgr *bufmgr, > bool has_llc) > { > - intel_batchbuffer_reset(batch, bufmgr, has_llc); > - > if (!has_llc) { > batch->cpu_map = malloc(BATCH_SZ); > batch->map = batch->cpu_map; > @@ -85,6 +109,12 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch, > batch->state_batch_sizes = > _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare); > } > + > + struct brw_context *brw = container_of(batch, brw, batch); > + __DRIscreen *dri_screen = brw->screen->driScrnPriv; > + batch->has_batch_first = test_has_batch_first(dri_screen->fd); > + > + intel_batchbuffer_reset(batch, bufmgr, has_llc); > } > > #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x)) > @@ -117,21 +147,12 @@ add_exec_bo(struct intel_batchbuffer *batch, struct > brw_bo *bo) > batch->exec_array_size * sizeof(batch->exec_objects[0])); > } > > - struct drm_i915_gem_exec_object2 *validation_entry = > - &batch->exec_objects[batch->exec_count]; > - validation_entry->handle = bo->gem_handle; > - if (bo == batch->bo) { > - validation_entry->relocation_count = batch->reloc_count; > - validation_entry->relocs_ptr = (uintptr_t) batch->relocs; > - } else { > - validation_entry->relocation_count = 0; > - validation_entry->relocs_ptr = 0; > - } > - validation_entry->alignment = bo->align; > - validation_entry->offset = bo->offset64; > - validation_entry->flags = bo->kflags; > - validation_entry->rsvd1 = 0; > - validation_entry->rsvd2 = 0; > + struct drm_i915_gem_exec_object2 *exec = > + memset(&batch->exec_objects[batch->exec_count], 0, sizeof(*exec)); > + exec->handle = bo->gem_handle; > + exec->alignment = bo->align; > + exec->offset = bo->offset64; > + exec->flags = bo->kflags; > > bo->index = batch->exec_count; > batch->exec_bos[batch->exec_count] = bo; > @@ -157,6 +178,11 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch, > } > batch->map_next = batch->map; > > + if (batch->has_batch_first) { > + add_exec_bo(batch, batch->bo); > + assert(batch->bo->index == 0); > + } > + > batch->reserved_space = BATCH_RESERVED; > batch->state_batch_offset = batch->bo->size; > batch->needs_sol_reset = false; > @@ -663,15 +689,25 @@ do_flush_locked(struct brw_context *brw, int > in_fence_fd, int *out_fence_fd) > } else { > flags |= I915_EXEC_RENDER; > } > + > if (batch->needs_sol_reset) > flags |= I915_EXEC_GEN7_SOL_RESET; > > + unsigned int index; > + if (batch->has_batch_first) { > + flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT; > + index = 0; > + } else { > + index = add_exec_bo(batch, batch->bo); > + } > + > + struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index]; > + exec->relocation_count = batch->reloc_count; > + exec->relocs_ptr = (uintptr_t) batch->relocs; > + > if (ret == 0) { > uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0; > > - /* Add the batch itself to the end of the validation list */ > - add_exec_bo(batch, batch->bo); > - > ret = execbuffer(dri_screen->fd, batch, hw_ctx, > 4 * USED_BATCH(*batch), > in_fence_fd, out_fence_fd, flags); > @@ -793,8 +829,9 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t > batch_offset, > assert(batch_offset <= BATCH_SZ - sizeof(uint32_t)); > assert(_mesa_bitcount(write_domain) <= 1); > > + unsigned int index; > if (target != batch->bo) { > - unsigned int index = add_exec_bo(batch, target); > + index = add_exec_bo(batch, target); > struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index]; > > if (write_domain) { > @@ -811,6 +848,7 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t > batch_offset, > offset64 = exec->offset; > } else { > offset64 = target->offset64; > + index = target->index; index = 0; Yes the bathc isn't ever shared, but I think it's better to make this obviously safe. With the nits addressed: Reviewed-by: Daniel Vetter <daniel.vet...@ffwll.ch> > } > > struct drm_i915_gem_relocation_entry *reloc = > @@ -820,7 +858,7 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t > batch_offset, > > reloc->offset = batch_offset; > reloc->delta = target_offset; > - reloc->target_handle = target->gem_handle; > + reloc->target_handle = batch->has_batch_first ? index : > target->gem_handle; > reloc->read_domains = read_domains; > reloc->write_domain = write_domain; > reloc->presumed_offset = offset64; > -- > 2.11.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev