On Friday, December 04, 2015 04:45:03 PM Jason Ekstrand wrote: > On Wed, Dec 2, 2015 at 4:15 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > > Tessellation control shaders need to be careful when writing outputs. > > Because multiple threads can concurrently write the same output > > variables, we need to only write the exact components we were told. > > > > Traditionally, for sub-vector writes, we've read the whole vector, > > updated the temporary, and written the whole vector back. This breaks > > down with concurrent access. > > > > This patch prepares the way for a solution by adding a writemask field > > to store_var intrinsics, as well as the other store intrinsics. It then > > updates all produces to emit a writemask of "all channels enabled". It > > updates nir_lower_io to copy the writemask to output store intrinsics. > > > > Finally, it updates nir_lower_vars_to_ssa to handle partial writemasks > > by doing a read-modify-write cycle (which is safe, because local > > variables are specific to a single thread). > > > > This should have no functional change, since no one actually emits > > partial writemasks yet. > > > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> > > --- > > src/gallium/auxiliary/nir/tgsi_to_nir.c | 1 + > > .../drivers/freedreno/ir3/ir3_compiler_nir.c | 6 +++ > > src/glsl/nir/glsl_to_nir.cpp | 2 + > > src/glsl/nir/nir_builder.h | 4 +- > > src/glsl/nir/nir_intrinsics.h | 14 +++---- > > src/glsl/nir/nir_lower_gs_intrinsics.c | 3 +- > > src/glsl/nir/nir_lower_io.c | 2 + > > src/glsl/nir/nir_lower_locals_to_regs.c | 2 +- > > src/glsl/nir/nir_lower_var_copies.c | 1 + > > src/glsl/nir/nir_lower_vars_to_ssa.c | 46 > > ++++++++++++++++------ > > src/glsl/nir/nir_validate.c | 1 + > > src/mesa/program/prog_to_nir.c | 2 + > > 12 files changed, 63 insertions(+), 21 deletions(-) > > > > Technically, I don't think I need the ir3 changes here - it should work > > without any changes. I think I was just overzealously preparing things > > to handle writemasks before I realized there shouldn't be any. > > > > But, it might be worth doing anyway. Not sure. > > > > Jason and I have already talked about the fact that we're conflicting. > > I like what he's doing, and he suggested this; we just need to figure out > > what lands first. I decided to send these out anyway for people to look > > at and start reviewing, since there's a lot to do there. We'll sort it > > out in v2. > > > > diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c > > b/src/gallium/auxiliary/nir/tgsi_to_nir.c > > index 86c2ffa..f5547c8 100644 > > --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c > > +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c > > @@ -1894,6 +1894,7 @@ ttn_emit_instruction(struct ttn_compile *c) > > &tgsi_dst->Indirect : NULL; > > > > store->num_components = 4; > > + store->const_index[0] = 0xf; > > store->variables[0] = ttn_array_deref(c, store, var, offset, > > indirect); > > store->src[0] = nir_src_for_reg(dest.dest.reg.reg); > > > > diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c > > b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c > > index 8617704..3beed0e 100644 > > --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c > > +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c > > @@ -1314,6 +1314,9 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, > > nir_intrinsic_instr *intr) > > case nir_deref_array_type_direct: > > /* direct access does not require anything special: */ > > for (int i = 0; i < intr->num_components; i++) { > > + if (!(intr->const_index[0] & (1 << i))) > > + continue; > > + > > unsigned n = darr->base_offset * 4 + i; > > compile_assert(ctx, n < arr->length); > > arr->arr[n] = src[i]; > > @@ -1326,6 +1329,9 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, > > nir_intrinsic_instr *intr) > > struct ir3_instruction *addr = > > get_addr(ctx, get_src(ctx, > > &darr->indirect)[0]); > > for (int i = 0; i < intr->num_components; i++) { > > + if (!(intr->const_index[0] & (1 << i))) > > + continue; > > + > > struct ir3_instruction *store; > > unsigned n = darr->base_offset * 4 + i; > > compile_assert(ctx, n < arr->length); > > diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp > > index 45d045c..5c84b0d 100644 > > --- a/src/glsl/nir/glsl_to_nir.cpp > > +++ b/src/glsl/nir/glsl_to_nir.cpp > > @@ -982,6 +982,7 @@ nir_visitor::visit(ir_call *ir) > > nir_intrinsic_instr *store_instr = > > nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); > > store_instr->num_components = > > ir->return_deref->type->vector_elements; > > + store_instr->const_index[0] = (1 << store_instr->num_components) > > - 1; > > > > store_instr->variables[0] = > > evaluate_deref(&store_instr->instr, ir->return_deref); > > @@ -1080,6 +1081,7 @@ nir_visitor::visit(ir_assignment *ir) > > nir_intrinsic_instr *store = > > nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); > > store->num_components = ir->lhs->type->vector_elements; > > + store->const_index[0] = (1 << store->num_components) - 1; > > nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref); > > store->variables[0] = nir_deref_as_var(store_deref); > > store->src[0] = nir_src_for_ssa(src); > > diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h > > index b909f483..6478aad 100644 > > --- a/src/glsl/nir/nir_builder.h > > +++ b/src/glsl/nir/nir_builder.h > > @@ -310,13 +310,15 @@ nir_load_var(nir_builder *build, nir_variable *var) > > } > > > > static inline void > > -nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value) > > +nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value, > > + unsigned writemask) > > { > > const unsigned num_components = glsl_get_vector_elements(var->type); > > > > nir_intrinsic_instr *store = > > nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var); > > store->num_components = num_components; > > + store->const_index[0] = writemask; > > store->variables[0] = nir_deref_var_create(store, var); > > store->src[0] = nir_src_for_ssa(value); > > nir_builder_instr_insert(build, &store->instr); > > diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h > > index b2565c5..013f2ac 100644 > > --- a/src/glsl/nir/nir_intrinsics.h > > +++ b/src/glsl/nir/nir_intrinsics.h > > @@ -43,7 +43,7 @@ > > > > > > INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE) > > -INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0) > > +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0) > > INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) > > > > /* > > @@ -266,16 +266,16 @@ LOAD(per_vertex_output, 1, 1, > > NIR_INTRINSIC_CAN_ELIMINATE) > > * block index and an extra index with the writemask to use. > > */ > > > > -#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \ > > +#define STORE(name, extra_srcs, extra_srcs_size, flags) \ > > INTRINSIC(store_##name, 1 + extra_srcs, \ > > ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \ > > - false, 0, 0, 1 + extra_indices, flags) \ > > + false, 0, 0, 2, flags) \ > > INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \ > > ARR(0, 1, extra_srcs_size, extra_srcs_size), \ > > - false, 0, 0, 1 + extra_indices, flags) > > + false, 0, 0, 2, flags) > > > > -STORE(output, 0, 0, 0, 0) > > -STORE(per_vertex_output, 1, 1, 0, 0) > > -STORE(ssbo, 1, 1, 1, 0) > > +STORE(output, 0, 0, 0) > > +STORE(per_vertex_output, 1, 1, 0) > > +STORE(ssbo, 1, 1, 0) > > > > LAST_INTRINSIC(store_ssbo_indirect) > > diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c > > b/src/glsl/nir/nir_lower_gs_intrinsics.c > > index e0d0678..7cde839 100644 > > --- a/src/glsl/nir/nir_lower_gs_intrinsics.c > > +++ b/src/glsl/nir/nir_lower_gs_intrinsics.c > > @@ -99,7 +99,8 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct > > state *state) > > > > /* Increment the vertex count by 1 */ > > nir_store_var(b, state->vertex_count_var, > > - nir_iadd(b, count, nir_imm_int(b, 1))); > > + nir_iadd(b, count, nir_imm_int(b, 1)), > > + 1); /* .x */ > > I'd rather 0x1 since it is a bitfield.
I'd rather WRITEMASK_X, but I think we'd have to move the defines from prog_instruction.h to shader_enums.h...
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev