On Wed, Oct 07, 2015 at 07:11:45AM -0700, Kristian H?gsberg Kristensen wrote: > The initial motivation for this patch was to avoid calling > brw_cs_prog_local_id_payload_dwords() in gen7_cs_state.c from the > compiler. This commit ends up refactoring things a bit more so as to > split out the logic to build the local id payload to brw_fs.cpp. This > moves the payload building closer to the compiler code that uses the > payload layout and makes it avaiable to other users of the compiler. > > Signed-off-by: Kristian Høgsberg Kristensen <k...@bitplanet.net> > --- > src/mesa/drivers/dri/i965/brw_context.h | 1 + > src/mesa/drivers/dri/i965/brw_cs.h | 5 +- > src/mesa/drivers/dri/i965/brw_fs.cpp | 68 ++++++++++++++++++++++++-- > src/mesa/drivers/dri/i965/gen7_cs_state.c | 80 > ++++--------------------------- > 4 files changed, 76 insertions(+), 78 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index 0a29a69..1869f28 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -484,6 +484,7 @@ struct brw_cs_prog_data { > unsigned simd_size; > bool uses_barrier; > bool uses_num_work_groups; > + unsigned local_invocation_id_regs; > > struct { > /** @{ > diff --git a/src/mesa/drivers/dri/i965/brw_cs.h > b/src/mesa/drivers/dri/i965/brw_cs.h > index 0c0ed2b..c07eb6c 100644 > --- a/src/mesa/drivers/dri/i965/brw_cs.h > +++ b/src/mesa/drivers/dri/i965/brw_cs.h > @@ -48,8 +48,9 @@ brw_cs_emit(struct brw_context *brw, > struct gl_shader_program *prog, > unsigned *final_assembly_size); > > -unsigned > -brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width); > +void > +brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, > + void *buffer, uint32_t threads, uint32_t > stride); > > #ifdef __cplusplus > } > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 1dee888..b4125aa 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -4718,20 +4718,43 @@ fs_visitor::setup_vs_payload() > payload.num_regs = 2; > } > > +/** > + * We are building the local ID push constant data using the simplest > possible > + * method. We simply push the local IDs directly as they should appear in the > + * registers for the uvec3 gl_LocalInvocationID variable. > + * > + * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6 > + * registers worth of push constant space. > + * > + * Note: Any updates to brw_cs_prog_local_id_payload_dwords, > + * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup > need > + * to coordinated. > + * > + * FINISHME: There are a few easy optimizations to consider. > + * > + * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is > + * no need for using push constant space for that dimension. > + * > + * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can > + * easily use 16-bit words rather than 32-bit dwords in the push constant > + * data. > + * > + * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for > + * conveying the data, and thereby reduce push constant usage. > + * > + */ > void > fs_visitor::setup_cs_payload() > { > assert(devinfo->gen >= 7); > + brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data; > > payload.num_regs = 1; > > if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) { > - const unsigned local_id_dwords = > - brw_cs_prog_local_id_payload_dwords(dispatch_width); > - assert((local_id_dwords & 0x7) == 0); > - const unsigned local_id_regs = local_id_dwords / 8; > + prog_data->local_invocation_id_regs = dispatch_width * 3 / 8; > payload.local_invocation_id_reg = payload.num_regs; > - payload.num_regs += local_id_regs; > + payload.num_regs += prog_data->local_invocation_id_regs; > } > } > > @@ -5171,6 +5194,41 @@ brw_wm_fs_emit(struct brw_context *brw, > return g.get_assembly(final_assembly_size); > } > > +void > +brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data, > + void *buffer, uint32_t threads, uint32_t stride) > +{ > + if (prog_data->local_invocation_id_regs == 0) > + return; > + > + /* stride should be an integer number of registers, that is, a multiple of > + * 32 bytes. */
Comment closing on its own line and you could start the sentence with a capital 'S' as well. > + assert(stride % 32 == 0); > + > + unsigned x = 0, y = 0, z = 0; > + for (unsigned t = 0; t < threads; t++) { > + uint32_t *param = (uint32_t *) buffer + stride * t / 4; > + > + for (unsigned i = 0; i < prog_data->simd_size; i++) { > + param[0 * prog_data->simd_size + i] = x; > + param[1 * prog_data->simd_size + i] = y; > + param[2 * prog_data->simd_size + i] = z; > + > + x++; > + if (x == prog_data->local_size[0]) { > + x = 0; > + y++; > + if (y == prog_data->local_size[1]) { > + y = 0; > + z++; > + if (z == prog_data->local_size[2]) > + z = 0; > + } > + } > + } > + } > +} > + > fs_reg * > fs_visitor::emit_cs_local_invocation_id_setup() > { > diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c > b/src/mesa/drivers/dri/i965/gen7_cs_state.c > index 5edc4fc..6aeb0cb 100644 > --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c > +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c > @@ -70,10 +70,8 @@ brw_upload_cs_state(struct brw_context *brw) > > unsigned local_id_dwords = 0; > > - if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { > - local_id_dwords = > - brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size); > - } > + if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) > + local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; > > unsigned push_constant_data_size = > (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value); > @@ -191,63 +189,6 @@ const struct brw_tracked_state brw_cs_state = { > > > /** > - * We are building the local ID push constant data using the simplest > possible > - * method. We simply push the local IDs directly as they should appear in the > - * registers for the uvec3 gl_LocalInvocationID variable. > - * > - * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6 > - * registers worth of push constant space. > - * > - * Note: Any updates to brw_cs_prog_local_id_payload_dwords, > - * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup > need > - * to coordinated. > - * > - * FINISHME: There are a few easy optimizations to consider. > - * > - * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is > - * no need for using push constant space for that dimension. > - * > - * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can > - * easily use 16-bit words rather than 32-bit dwords in the push constant > - * data. > - * > - * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for > - * conveying the data, and thereby reduce push constant usage. > - * > - */ > -unsigned > -brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width) > -{ > - return 3 * dispatch_width; > -} > - > - > -static void > -fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, > - void *buffer, unsigned *x, unsigned *y, unsigned *z) > -{ > - uint32_t *param = (uint32_t *)buffer; > - for (unsigned i = 0; i < cs_prog_data->simd_size; i++) { > - param[0 * cs_prog_data->simd_size + i] = *x; > - param[1 * cs_prog_data->simd_size + i] = *y; > - param[2 * cs_prog_data->simd_size + i] = *z; > - > - (*x)++; > - if (*x == cs_prog_data->local_size[0]) { > - *x = 0; > - (*y)++; > - if (*y == cs_prog_data->local_size[1]) { > - *y = 0; > - (*z)++; > - if (*z == cs_prog_data->local_size[2]) > - *z = 0; > - } > - } > - } > -} > - > - > -/** > * Creates a region containing the push constants for the CS on gen7+. > * > * Push constants are constant values (such as GLSL uniforms) that are > @@ -269,10 +210,8 @@ brw_upload_cs_push_constants(struct brw_context *brw, > (struct brw_stage_prog_data*) cs_prog_data; > unsigned local_id_dwords = 0; > > - if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { > - local_id_dwords = > - brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size); > - } > + if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) > + local_id_dwords = cs_prog_data->local_invocation_id_regs * 8; > > /* Updates the ParamaterValues[i] pointers for all parameters of the > * basic type of PROGRAM_STATE_VAR. > @@ -302,14 +241,13 @@ brw_upload_cs_push_constants(struct brw_context *brw, > > STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); > > + brw_cs_fill_local_id_payload(cs_prog_data, param, threads, > + reg_aligned_constant_size); > + > /* _NEW_PROGRAM_CONSTANTS */ > - unsigned x = 0, y = 0, z = 0; > for (t = 0; t < threads; t++) { > - gl_constant_value *next_param = ¶m[t * param_aligned_count]; > - if (local_id_dwords > 0) { > - fill_local_id_payload(cs_prog_data, (void*)next_param, &x, &y, > &z); > - next_param += local_id_dwords; > - } > + gl_constant_value *next_param = > + ¶m[t * param_aligned_count + local_id_dwords]; > for (i = 0; i < prog_data->nr_params; i++) { > next_param[i] = *prog_data->param[i]; > } > -- > 2.4.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev