For the common path where we want to execute the batch, if we push the no_hw detection down to the execbuf we can eliminate one loop over all the execobjects. For the less common path where we don't want to execute the batch, no_hw was leaving out_fence uninitialised.
Cc: Kenneth Graunke <kenn...@whitecape.org> Cc: Matt Turner <matts...@gmail.com> --- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 116 +++++++++++++------------- 1 file changed, 56 insertions(+), 60 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 7f5f240597..59b142cc75 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -438,13 +438,6 @@ static void do_batch_dump(struct brw_context *brw) { } static void brw_new_batch(struct brw_context *brw) { - /* Unreference any BOs held by the previous batch, and reset counts. */ - for (int i = 0; i < brw->batch.exec_count; i++) { - if (brw->batch.exec_bos[i] != brw->batch.bo) { - brw_bo_unreference(brw->batch.exec_bos[i]); - } - brw->batch.exec_bos[i] = NULL; - } brw->batch.reloc_count = 0; brw->batch.exec_count = 0; brw->batch.aperture_space = BATCH_SZ; @@ -571,21 +564,21 @@ throttle(struct brw_context *brw) } static int -execbuffer(int fd, - struct intel_batchbuffer *batch, - uint32_t ctx_id, - int used, +execbuffer(struct brw_context *brw, int in_fence, int *out_fence, int flags) { + struct intel_batchbuffer *batch = &brw->batch; struct drm_i915_gem_execbuffer2 execbuf = { .buffers_ptr = (uintptr_t) batch->exec_objects, .buffer_count = batch->exec_count, .batch_start_offset = 0, - .batch_len = used, + .batch_len = 4 * USED_BATCH(*batch), .flags = flags, - .rsvd1 = ctx_id, /* rsvd1 is actually the context ID */ + + /* rsvd1 is actually the context ID */ + .rsvd1 = batch->ring == RENDER_RING ? brw->hw_ctx : 0, }; unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2; @@ -601,9 +594,14 @@ execbuffer(int fd, execbuf.flags |= I915_EXEC_FENCE_OUT; } - int ret = drmIoctl(fd, cmd, &execbuf); - if (ret != 0) - ret = -errno; + int ret = 0; + if (likely(!brw->screen->no_hw)) { + __DRIscreen *dri_screen = brw->screen->driScrnPriv; + if (unlikely(drmIoctl(dri_screen->fd, cmd, &execbuf))) + ret = -errno; + } else { + out_fence = NULL; + } for (int i = 0; i < batch->exec_count; i++) { struct brw_bo *bo = batch->exec_bos[i]; @@ -617,6 +615,11 @@ execbuffer(int fd, bo->gem_handle, bo->offset64, batch->exec_objects[i].offset); bo->offset64 = batch->exec_objects[i].offset; } + + if (batch->exec_bos[i] != batch->bo) { + brw_bo_unreference(batch->exec_bos[i]); + } + batch->exec_bos[i] = NULL; } if (ret == 0 && out_fence != NULL) @@ -628,7 +631,6 @@ execbuffer(int fd, static int do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd) { - __DRIscreen *dri_screen = brw->screen->driScrnPriv; struct intel_batchbuffer *batch = &brw->batch; int ret = 0; @@ -644,55 +646,49 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd) } } - if (!brw->screen->no_hw) { - unsigned int flags; - - /* The requirement for using I915_EXEC_NO_RELOC are: - * - * The addresses written in the objects must match the corresponding - * reloc.presumed_offset which in turn must match the corresponding - * execobject.offset. - * - * Any render targets written to in the batch must be flagged with - * EXEC_OBJECT_WRITE. - * - * To avoid stalling, execobject.offset should match the current - * address of that object within the active context. - */ - flags = I915_EXEC_NO_RELOC; - if (brw->gen >= 6 && batch->ring == BLT_RING) { - flags |= I915_EXEC_BLT; - } else { - flags |= I915_EXEC_RENDER; - } + unsigned int flags; - if (batch->needs_sol_reset) - flags |= I915_EXEC_GEN7_SOL_RESET; - - struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[0]; - assert(exec->handle == batch->bo->gem_handle); - exec->relocation_count = batch->reloc_count; - exec->relocs_ptr = (uintptr_t) batch->relocs; - if (batch->use_exec_lut) { - flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT; - } else { - struct drm_i915_gem_exec_object2 tmp = *exec; - unsigned int index = batch->exec_count - 1; - *exec = batch->exec_objects[index]; - batch->exec_objects[index] = tmp; - } + /* The requirement for using I915_EXEC_NO_RELOC are: + * + * The addresses written in the objects must match the corresponding + * reloc.presumed_offset which in turn must match the corresponding + * execobject.offset. + * + * Any render targets written to in the batch must be flagged with + * EXEC_OBJECT_WRITE. + * + * To avoid stalling, execobject.offset should match the current + * address of that object within the active context. + */ + flags = I915_EXEC_NO_RELOC; + if (brw->gen >= 6 && batch->ring == BLT_RING) { + flags |= I915_EXEC_BLT; + } else { + flags |= I915_EXEC_RENDER; + } - if (ret == 0) { - uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0; + if (batch->needs_sol_reset) + flags |= I915_EXEC_GEN7_SOL_RESET; - ret = execbuffer(dri_screen->fd, batch, hw_ctx, - 4 * USED_BATCH(*batch), - in_fence_fd, out_fence_fd, flags); - } + struct drm_i915_gem_exec_object2 *exec = &batch->exec_objects[0]; + assert(exec->handle == batch->bo->gem_handle); + exec->relocation_count = batch->reloc_count; + exec->relocs_ptr = (uintptr_t) batch->relocs; + if (batch->use_exec_lut) { + flags |= I915_EXEC_BATCH_FIRST | I915_EXEC_HANDLE_LUT; + } else { + struct drm_i915_gem_exec_object2 tmp = *exec; + unsigned int index = batch->exec_count - 1; + *exec = batch->exec_objects[index]; + batch->exec_objects[index] = tmp; + } - throttle(brw); + if (ret == 0) { + ret = execbuffer(brw, in_fence_fd, out_fence_fd, flags); } + throttle(brw); + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) do_batch_dump(brw); -- 2.13.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev