On Nov 9, 2016 5:53 AM, "Timothy Arceri" <timothy.arc...@collabora.com> wrote: > > On Sat, 2016-11-05 at 13:03 -0400, Ilia Mirkin wrote: > > Instead of packing varyings into vec4's, keep track of how many > > components each slot uses and create varyings with matching types. > > This > > ensures that we don't end up using more components than the orginal > > shader, which is especially important for geometry shader output > > limits. > > > > Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> > > --- > > > > This comes up for nouveau, where the limit is 1024 output components > > for a GS, > > and the hardware complains *loudly* if you even think about going > > over. > > Since this is part of the justification for the change I'd like to see > this included in the commit message. IMO this screams this change is > important much more than the second paragraph does alone.
Ok. > > > > > src/compiler/glsl/ir_optimization.h | 4 +++- > > src/compiler/glsl/link_varyings.cpp | 16 +++++++++++++--- > > src/compiler/glsl/lower_packed_varyings.cpp | 21 ++++++++++++++++--- > > -- > > 3 files changed, 32 insertions(+), 9 deletions(-) > > > > diff --git a/src/compiler/glsl/ir_optimization.h > > b/src/compiler/glsl/ir_optimization.h > > index 6f2bc32..8cee418 100644 > > --- a/src/compiler/glsl/ir_optimization.h > > +++ b/src/compiler/glsl/ir_optimization.h > > @@ -136,7 +136,9 @@ void lower_shared_reference(struct > > gl_linked_shader *shader, > > void lower_ubo_reference(struct gl_linked_shader *shader, > > bool clamp_block_indices); > > void lower_packed_varyings(void *mem_ctx, > > - unsigned locations_used, ir_variable_mode > > mode, > > + unsigned locations_used, > > + const uint8_t *components, > > + ir_variable_mode mode, > > unsigned gs_input_vertices, > > gl_linked_shader *shader, > > bool disable_varying_packing, bool > > xfb_enabled); > > diff --git a/src/compiler/glsl/link_varyings.cpp > > b/src/compiler/glsl/link_varyings.cpp > > index 49843cc..a1dd133 100644 > > --- a/src/compiler/glsl/link_varyings.cpp > > +++ b/src/compiler/glsl/link_varyings.cpp > > @@ -1217,6 +1217,7 @@ public: > > ~varying_matches(); > > void record(ir_variable *producer_var, ir_variable > > *consumer_var); > > unsigned assign_locations(struct gl_shader_program *prog, > > + uint8_t *components, > > uint64_t reserved_slots); > > void store_locations() const; > > > > @@ -1482,6 +1483,7 @@ varying_matches::record(ir_variable > > *producer_var, ir_variable *consumer_var) > > */ > > unsigned > > varying_matches::assign_locations(struct gl_shader_program *prog, > > + uint8_t *components, > > uint64_t reserved_slots) > > { > > /* If packing has been disabled then we cannot safely sort the > > varyings by > > @@ -1585,6 +1587,12 @@ varying_matches::assign_locations(struct > > gl_shader_program *prog, > > var->name); > > } > > > > + if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) { > > + for (unsigned j = *location / 4u; j < slot_end / 4u; j++) > > + components[j] = 4; > > + components[slot_end / 4u] = (slot_end & 3) + 1; > > Everything else in the patch looks good but doesn't this make the > component size one component to big? The way I'm setting things up, components has values between 1..4. slot_end & 3 would be 0..3. Remember that slot_end is inclusive, too. > > > + } > > + > > this->matches[i].generic_location = *location; > > > > *location = slot_end + 1; > > @@ -2203,7 +2211,9 @@ assign_varying_locations(struct gl_context > > *ctx, > > } > > } > > > > - const unsigned slots_used = matches.assign_locations(prog, > > reserved_slots); > > + uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0}; > > + const unsigned slots_used = matches.assign_locations( > > + prog, components, reserved_slots); > > matches.store_locations(); > > > > for (unsigned i = 0; i < num_tfeedback_decls; ++i) { > > @@ -2263,13 +2273,13 @@ assign_varying_locations(struct gl_context > > *ctx, > > } > > > > if (producer) { > > - lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, > > + lower_packed_varyings(mem_ctx, slots_used, components, > > ir_var_shader_out, > > 0, producer, disable_varying_packing, > > xfb_enabled); > > } > > > > if (consumer) { > > - lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, > > + lower_packed_varyings(mem_ctx, slots_used, components, > > ir_var_shader_in, > > consumer_vertices, consumer, > > disable_varying_packing, xfb_enabled); > > } > > diff --git a/src/compiler/glsl/lower_packed_varyings.cpp > > b/src/compiler/glsl/lower_packed_varyings.cpp > > index 19bbe57..b16f25f 100644 > > --- a/src/compiler/glsl/lower_packed_varyings.cpp > > +++ b/src/compiler/glsl/lower_packed_varyings.cpp > > @@ -164,7 +164,9 @@ namespace { > > class lower_packed_varyings_visitor > > { > > public: > > - lower_packed_varyings_visitor(void *mem_ctx, unsigned > > locations_used, > > + lower_packed_varyings_visitor(void *mem_ctx, > > + unsigned locations_used, > > + const uint8_t *components, > > ir_variable_mode mode, > > unsigned gs_input_vertices, > > exec_list *out_instructions, > > @@ -203,6 +205,8 @@ private: > > */ > > const unsigned locations_used; > > > > + const uint8_t* components; > > + > > /** > > * Array of pointers to the packed varyings that have been > > created for each > > * generic varying slot. NULL entries in this array indicate > > varying slots > > @@ -241,12 +245,14 @@ private: > > } /* anonymous namespace */ > > > > lower_packed_varyings_visitor::lower_packed_varyings_visitor( > > - void *mem_ctx, unsigned locations_used, ir_variable_mode mode, > > + void *mem_ctx, unsigned locations_used, const uint8_t > > *components, > > + ir_variable_mode mode, > > unsigned gs_input_vertices, exec_list *out_instructions, > > exec_list *out_variables, bool disable_varying_packing, > > bool xfb_enabled) > > : mem_ctx(mem_ctx), > > locations_used(locations_used), > > + components(components), > > packed_varyings((ir_variable **) > > rzalloc_array_size(mem_ctx, > > sizeof(*packed_varyings), > > locations_used)), > > @@ -607,10 +613,11 @@ > > lower_packed_varyings_visitor::get_packed_varying_deref( > > if (this->packed_varyings[slot] == NULL) { > > char *packed_name = ralloc_asprintf(this->mem_ctx, > > "packed:%s", name); > > const glsl_type *packed_type; > > + assert(components[slot] != 0); > > if (unpacked_var->is_interpolation_flat()) > > - packed_type = glsl_type::ivec4_type; > > + packed_type = glsl_type::get_instance(GLSL_TYPE_INT, > > components[slot], 1); > > else > > - packed_type = glsl_type::vec4_type; > > + packed_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, > > components[slot], 1); > > if (this->gs_input_vertices != 0) { > > packed_type = > > glsl_type::get_array_instance(packed_type, > > @@ -771,6 +778,7 @@ > > lower_packed_varyings_return_splicer::visit_leave(ir_return *ret) > > > > void > > lower_packed_varyings(void *mem_ctx, unsigned locations_used, > > + const uint8_t *components, > > ir_variable_mode mode, unsigned > > gs_input_vertices, > > gl_linked_shader *shader, bool > > disable_varying_packing, > > bool xfb_enabled) > > @@ -781,7 +789,10 @@ lower_packed_varyings(void *mem_ctx, unsigned > > locations_used, > > ir_function_signature *main_func_sig > > = main_func->matching_signature(NULL, &void_parameters, > > false); > > exec_list new_instructions, new_variables; > > - lower_packed_varyings_visitor visitor(mem_ctx, locations_used, > > mode, > > + lower_packed_varyings_visitor visitor(mem_ctx, > > + locations_used, > > + components, > > + mode, > > gs_input_vertices, > > &new_instructions, > > &new_variables,
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev