On Sat, Sep 19, 2015 at 03:50:49PM -0700, Jordan Justen wrote: > Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com> > --- > src/mesa/drivers/dri/i965/brw_compute.c | 57 > ++++++++++++++++++++++++++++++--- > src/mesa/drivers/dri/i965/brw_defines.h | 2 ++ > src/mesa/drivers/dri/i965/intel_reg.h | 5 +++ > 3 files changed, 60 insertions(+), 4 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_compute.c > b/src/mesa/drivers/dri/i965/brw_compute.c > index 5693ab5..5641823 100644 > --- a/src/mesa/drivers/dri/i965/brw_compute.c > +++ b/src/mesa/drivers/dri/i965/brw_compute.c > @@ -31,14 +31,46 @@ > #include "brw_draw.h" > #include "brw_state.h" > #include "intel_batchbuffer.h" > +#include "intel_buffer_objects.h" > #include "brw_defines.h" > > > static void > -brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups) > +brw_emit_gpgpu_walker(struct brw_context *brw, > + const void *compute_param, > + bool indirect) > { > const struct brw_cs_prog_data *prog_data = brw->cs.prog_data; > > + const GLuint *num_groups; > + uint32_t indirect_flag; > + > + if (!indirect) { > + num_groups = (const GLuint *)compute_param; > + indirect_flag = 0; > + } else { > + GLintptr indirect_offset = *(GLintptr*)compute_param;
I would call this as brw_dispatch_compute_common(ctx, indirect, true); from brw_dispatch_compute_indirect() instead of passing the address of indirect and then just say GLintptr indirect_offset = (GLintptr)compute_param; here. GLintptr is sized so that that's guaranteed to work. With that, series Reviewed-by: Kristian Høgsberg <k...@bitplanet.net> > + static const GLuint indirect_group_counts[3] = { 0, 0, 0 }; > + num_groups = indirect_group_counts; > + > + struct gl_buffer_object *indirect_buffer = > brw->ctx.DispatchIndirectBuffer; > + drm_intel_bo *bo = intel_bufferobj_buffer(brw, > + intel_buffer_object(indirect_buffer), > + indirect_offset, 3 * sizeof(GLuint)); > + > + indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE; > + > + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo, > + I915_GEM_DOMAIN_VERTEX, 0, > + indirect_offset + 0); > + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo, > + I915_GEM_DOMAIN_VERTEX, 0, > + indirect_offset + 4); > + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo, > + I915_GEM_DOMAIN_VERTEX, 0, > + indirect_offset + 8); > + } > + > const unsigned simd_size = prog_data->simd_size; > unsigned group_size = prog_data->local_size[0] * > prog_data->local_size[1] * prog_data->local_size[2]; > @@ -52,7 +84,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint > *num_groups) > > uint32_t dwords = brw->gen < 8 ? 11 : 15; > BEGIN_BATCH(dwords); > - OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2)); > + OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag); > OUT_BATCH(0); > if (brw->gen >= 8) { > OUT_BATCH(0); /* Indirect Data Length */ > @@ -83,7 +115,9 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const > GLuint *num_groups) > > > static void > -brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) > +brw_dispatch_compute_common(struct gl_context *ctx, > + const void *compute_param, > + bool indirect) > { > struct brw_context *brw = brw_context(ctx); > int estimated_buffer_space_needed; > @@ -117,7 +151,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint > *num_groups) > brw->no_batch_wrap = true; > brw_upload_compute_state(brw); > > - brw_emit_gpgpu_walker(brw, num_groups); > + brw_emit_gpgpu_walker(brw, compute_param, indirect); > > brw->no_batch_wrap = false; > > @@ -155,9 +189,24 @@ brw_dispatch_compute(struct gl_context *ctx, const > GLuint *num_groups) > */ > } > > +static void > +brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) { > + brw_dispatch_compute_common(ctx, > + num_groups, > + false); > +} > + > +static void > +brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect) > +{ > + brw_dispatch_compute_common(ctx, > + &indirect, > + true); > +} > > void > brw_init_compute_functions(struct dd_function_table *functions) > { > functions->DispatchCompute = brw_dispatch_compute; > + functions->DispatchComputeIndirect = brw_dispatch_compute_indirect; > } > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 8fc8ceb..2de51d0 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -2698,6 +2698,8 @@ enum brw_wm_barycentric_interp_mode { > # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0) > #define MEDIA_STATE_FLUSH 0x7004 > #define GPGPU_WALKER 0x7105 > +/* GEN7 DW0 */ > +# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10) > /* GEN8+ DW2 */ > # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0 > # define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0) > diff --git a/src/mesa/drivers/dri/i965/intel_reg.h > b/src/mesa/drivers/dri/i965/intel_reg.h > index 58007d3..a261c2b 100644 > --- a/src/mesa/drivers/dri/i965/intel_reg.h > +++ b/src/mesa/drivers/dri/i965/intel_reg.h > @@ -173,6 +173,11 @@ > #define GEN7_3DPRIM_START_INSTANCE 0x243C > #define GEN7_3DPRIM_BASE_VERTEX 0x2440 > > +/* Auto-Compute / Indirect Registers */ > +#define GEN7_GPGPU_DISPATCHDIMX 0x2500 > +#define GEN7_GPGPU_DISPATCHDIMY 0x2504 > +#define GEN7_GPGPU_DISPATCHDIMZ 0x2508 > + > #define GEN7_CACHE_MODE_1 0x7004 > # define GEN8_HIZ_NP_PMA_FIX_ENABLE (1 << 11) > # define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13) > -- > 2.5.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev