Looks good to me :) Reviewed-by: Plamena Manolova <plamena.manol...@intel.com>
On Fri, Nov 16, 2018 at 7:02 AM Jason Ekstrand <ja...@jlekstrand.net> wrote: > It's not at all intel-specific; the formula is dictated by OpenGL and > Vulkan. The only intel-specific thing is that we need the lowering. As > a nice side-effect, the new version is variable-group-size ready. > > Cc: Plamena Manolova <plamena.n.manol...@gmail.com> > --- > src/compiler/nir/nir.h | 1 + > src/compiler/nir/nir_lower_system_values.c | 49 ++++++++++++++++++- > src/intel/compiler/brw_compiler.c | 1 + > .../compiler/brw_nir_lower_cs_intrinsics.c | 33 ------------- > 4 files changed, 50 insertions(+), 34 deletions(-) > > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h > index b0cff50eaf2..1dd605010f6 100644 > --- a/src/compiler/nir/nir.h > +++ b/src/compiler/nir/nir.h > @@ -2178,6 +2178,7 @@ typedef struct nir_shader_compiler_options { > bool lower_helper_invocation; > > bool lower_cs_local_index_from_id; > + bool lower_cs_local_id_from_index; > > bool lower_device_index_to_zero; > > diff --git a/src/compiler/nir/nir_lower_system_values.c > b/src/compiler/nir/nir_lower_system_values.c > index fbc40573579..08a9e8be44a 100644 > --- a/src/compiler/nir/nir_lower_system_values.c > +++ b/src/compiler/nir/nir_lower_system_values.c > @@ -51,6 +51,45 @@ build_local_group_size(nir_builder *b) > return local_size; > } > > +static nir_ssa_def * > +build_local_invocation_id(nir_builder *b) > +{ > + if (b->shader->options->lower_cs_local_id_from_index) { > + /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based > + * on this formula: > + * > + * gl_LocalInvocationID.x = > + * gl_LocalInvocationIndex % gl_WorkGroupSize.x; > + * gl_LocalInvocationID.y = > + * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) % > + * gl_WorkGroupSize.y; > + * gl_LocalInvocationID.z = > + * (gl_LocalInvocationIndex / > + * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) % > + * gl_WorkGroupSize.z; > + * > + * However, the final % gl_WorkGroupSize.z does nothing unless we > + * accidentally end up with a gl_LocalInvocationIndex that is too > + * large so it can safely be omitted. > + */ > + nir_ssa_def *local_index = nir_load_local_invocation_index(b); > + nir_ssa_def *local_size = build_local_group_size(b); > + > + nir_ssa_def *id_x, *id_y, *id_z; > + id_x = nir_umod(b, local_index, > + nir_channel(b, local_size, 0)); > + id_y = nir_umod(b, nir_udiv(b, local_index, > + nir_channel(b, local_size, 0)), > + nir_channel(b, local_size, 1)); > + id_z = nir_udiv(b, local_index, > + nir_imul(b, nir_channel(b, local_size, 0), > + nir_channel(b, local_size, 1))); > + return nir_vec3(b, id_x, id_y, id_z); > + } else { > + return nir_load_local_invocation_id(b); > + } > +} > + > static bool > convert_block(nir_block *block, nir_builder *b) > { > @@ -91,7 +130,7 @@ convert_block(nir_block *block, nir_builder *b) > */ > nir_ssa_def *group_size = build_local_group_size(b); > nir_ssa_def *group_id = nir_load_work_group_id(b); > - nir_ssa_def *local_id = nir_load_local_invocation_id(b); > + nir_ssa_def *local_id = build_local_invocation_id(b); > > sysval = nir_iadd(b, nir_imul(b, group_id, group_size), > local_id); > break; > @@ -126,6 +165,14 @@ convert_block(nir_block *block, nir_builder *b) > break; > } > > + case SYSTEM_VALUE_LOCAL_INVOCATION_ID: > + /* If lower_cs_local_id_from_index is true, then we derive the > local > + * index from the local id. > + */ > + if (b->shader->options->lower_cs_local_id_from_index) > + sysval = build_local_invocation_id(b); > + break; > + > case SYSTEM_VALUE_LOCAL_GROUP_SIZE: { > sysval = build_local_group_size(b); > break; > diff --git a/src/intel/compiler/brw_compiler.c > b/src/intel/compiler/brw_compiler.c > index e863b08b991..fe632c5badc 100644 > --- a/src/intel/compiler/brw_compiler.c > +++ b/src/intel/compiler/brw_compiler.c > @@ -42,6 +42,7 @@ > .lower_fdiv = true, > \ > .lower_flrp64 = true, > \ > .lower_ldexp = true, > \ > + .lower_cs_local_id_from_index = true, > \ > .lower_device_index_to_zero = true, > \ > .native_integers = true, > \ > .use_interpolated_input_intrinsics = true, > \ > diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > index bfbdea0e8fa..fab5edc893f 100644 > --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c > @@ -70,39 +70,6 @@ lower_cs_intrinsics_convert_block(struct > lower_intrinsics_state *state, > break; > } > > - case nir_intrinsic_load_local_invocation_id: { > - /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex > based > - * on this formula: > - * > - * gl_LocalInvocationID.x = > - * gl_LocalInvocationIndex % gl_WorkGroupSize.x; > - * gl_LocalInvocationID.y = > - * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) % > - * gl_WorkGroupSize.y; > - * gl_LocalInvocationID.z = > - * (gl_LocalInvocationIndex / > - * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) % > - * gl_WorkGroupSize.z; > - */ > - unsigned *size = nir->info.cs.local_size; > - > - nir_ssa_def *local_index = nir_load_local_invocation_index(b); > - > - nir_const_value uvec3; > - memset(&uvec3, 0, sizeof(uvec3)); > - uvec3.u32[0] = 1; > - uvec3.u32[1] = size[0]; > - uvec3.u32[2] = size[0] * size[1]; > - nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3); > - uvec3.u32[0] = size[0]; > - uvec3.u32[1] = size[1]; > - uvec3.u32[2] = size[2]; > - nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3); > - > - sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val); > - break; > - } > - > case nir_intrinsic_load_subgroup_id: > if (state->local_workgroup_size > 8) > continue; > -- > 2.19.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev