Passing the index of the target buffer via the reloc.target_handle is marginally more efficient for the kernel (it can avoid some allocations, and can use a direct lookup rather than a hash or search). It is also useful for ourselves as we can use the index into our exec_bos for other tasks.
v2: Only enable HANDLE_LUT if we can use BATCH_FIRST and thereby avoid a post-processing loop to fixup the relocations. v3: Move kernel probing from context creation to screen init. Use batch->use_exec_lut as it more descriptive of what's going on (Daniel) Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Kenneth Graunke <kenn...@whitecape.org> Cc: Matt Turner <matts...@gmail.com> Cc: Jason Ekstrand <jason.ekstr...@intel.com> Cc: Daniel Vetter <daniel.vet...@ffwll.ch> --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/intel_batchbuffer.c | 60 +++++++++++++++++---------- src/mesa/drivers/dri/i965/intel_screen.c | 20 +++++++++ src/mesa/drivers/dri/i965/intel_screen.h | 5 +++ 4 files changed, 64 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index ffe4792b73..62ce5e472c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -452,6 +452,7 @@ struct intel_batchbuffer { uint32_t state_batch_offset; enum brw_gpu_ring ring; + bool use_exec_lut; bool needs_sol_reset; bool state_base_address_emitted; diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 065a9c1c0c..5f9639cd4d 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -62,8 +62,6 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch, struct brw_bufmgr *bufmgr, bool has_llc) { - intel_batchbuffer_reset(batch, bufmgr, has_llc); - if (!has_llc) { batch->cpu_map = malloc(BATCH_SZ); batch->map = batch->cpu_map; @@ -85,6 +83,16 @@ intel_batchbuffer_init(struct intel_batchbuffer *batch, batch->state_batch_sizes = _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare); } + + struct brw_context *brw = container_of(batch, brw, batch); + /* To use the LUT method for execbuf, we also require placing the batch + * first (to simplify our implementation). We require a kernel recent + * enough to always support EXEC_LUT_HANDLE, but we must check that + * the kernel supports EXEC_BATCH_FIRST. + */ + batch->use_exec_lut = brw->screen->kerninfo.has_exec_batch_first; + + intel_batchbuffer_reset(batch, bufmgr, has_llc); } #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x)) @@ -117,21 +125,12 @@ add_exec_bo(struct intel_batchbuffer *batch, struct brw_bo *bo) batch->exec_array_size * sizeof(batch->exec_objects[0])); } - struct drm_i915_gem_exec_object2 *validation_entry = - &batch->exec_objects[batch->exec_count]; - validation_entry->handle = bo->gem_handle; - if (bo == batch->bo) { - validation_entry->relocation_count = batch->reloc_count; - validation_entry->relocs_ptr = (uintptr_t) batch->relocs; - } else { - validation_entry->relocation_count = 0; - validation_entry->relocs_ptr = 0; - } - validation_entry->alignment = bo->align; - validation_entry->offset = bo->offset64; - validation_entry->flags = bo->kflags; - validation_entry->rsvd1 = 0; - validation_entry->rsvd2 = 0; + struct drm_i915_gem_exec_object2 *exec = + memset(&batch->exec_objects[batch->exec_count], 0, sizeof(*exec)); + exec->handle = bo->gem_handle; + exec->alignment = bo->align; + exec->offset = bo->offset64; + exec->flags = bo->kflags; bo->index = batch->exec_count; batch->exec_bos[batch->exec_count] = bo; @@ -157,6 +156,11 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch, } batch->map_next = batch->map; + if (batch->use_exec_lut) { + add_exec_bo(batch, batch->bo); + assert(batch->bo->index == 0); + } + batch->reserved_space = BATCH_RESERVED; batch->state_batch_offset = batch->bo->size; batch->needs_sol_reset = false; @@ -663,15 +667,25 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd) } else { flags |= I915_EXEC_RENDER; } + if (batch->needs_sol_reset) flags |= I915_EXEC_GEN7_SOL_RESET; + unsigned int index; + if (batch->use_exec_lut) { + flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT; + index = 0; + } else { + index = add_exec_bo(batch, batch->bo); + } + + struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index]; + exec->relocation_count = batch->reloc_count; + exec->relocs_ptr = (uintptr_t) batch->relocs; + if (ret == 0) { uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0; - /* Add the batch itself to the end of the validation list */ - add_exec_bo(batch, batch->bo); - ret = execbuffer(dri_screen->fd, batch, hw_ctx, 4 * USED_BATCH(*batch), in_fence_fd, out_fence_fd, flags); @@ -798,8 +812,9 @@ __brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset, assert(_mesa_bitcount(write_domain) <= 1); uint64_t offset64; + unsigned int index; if (target != batch->bo) { - unsigned int index = add_exec_bo(batch, target); + index = add_exec_bo(batch, target); struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[index]; if (write_domain) { @@ -816,6 +831,7 @@ __brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset, offset64 = exec->offset; } else { offset64 = target->offset64; + index = target->index; } struct drm_i915_gem_relocation_entry *reloc = @@ -825,7 +841,7 @@ __brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset, reloc->offset = batch_offset; reloc->delta = target_offset; - reloc->target_handle = target->gem_handle; + reloc->target_handle = batch->use_exec_lut ? index : target->gem_handle; reloc->read_domains = read_domains; reloc->write_domain = write_domain; reloc->presumed_offset = offset64; diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 7ccc1d34f0..99804c8329 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -51,6 +51,8 @@ #define DRM_FORMAT_MOD_LINEAR 0 #endif +#define DBG_NO_EXEC_BATCH_FIRST 0 + static const __DRIconfigOptionsExtension brw_config_options = { .base = { __DRI_CONFIG_OPTIONS, 1 }, .xml = @@ -1466,6 +1468,22 @@ intelDestroyBuffer(__DRIdrawable * driDrawPriv) _mesa_reference_framebuffer(&fb, NULL); } +static bool +test_has_exec_batch_first(struct intel_screen *screen) +{ + if (DBG_NO_EXEC_BATCH_FIRST) + return DBG_NO_EXEC_BATCH_FIRST < 0; + + return intel_get_integer(screen, I915_PARAM_HAS_EXEC_BATCH_FIRST) > 0; +} + +static void +intel_get_kernel_info(struct intel_screen *screen, + struct intel_kernel_info *kerninfo) +{ + kerninfo->has_exec_batch_first = test_has_exec_batch_first(screen); +} + static void intel_detect_sseu(struct intel_screen *screen) { @@ -2097,6 +2115,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen) if (!gen_get_device_info(screen->deviceID, &screen->devinfo)) return NULL; + intel_get_kernel_info(screen, &screen->kerninfo); + if (!intel_init_bufmgr(screen)) return NULL; diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index 0980c8f561..d88703b1e6 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -44,10 +44,15 @@ extern "C" { #endif +struct intel_kernel_info { + bool has_exec_batch_first; +}; + struct intel_screen { int deviceID; struct gen_device_info devinfo; + struct intel_kernel_info kerninfo; __DRIscreen *driScrnPriv; -- 2.13.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev