On Tue, 2016-02-02 at 16:50 +1100, Timothy Arceri wrote: > In GLES 3.1+ and GL 4.4+ there is no guarantee that interpolation > qualifiers will match between stages so we cannot safely pack > varyings using the current packing pass in Mesa. > > We also disable packing on outward facing SSO as these too are > outside > the rule that guarantees the interpolation qualifiers match. > > We do however enable packing on individual arrays, structs, and > matrices as these are required by the transform feedback code and it > is still safe to do so. > > Finally we also enable packing when a varying is only used for > transform feedback and its not a SSO. > > This fixes all remaining rendering issues with the dEQP SSO tests, > the only issues remaining with thoses tests are to do with > validation. > > Note: There is still one remaining SSO bug that this patch doesn't > fix. > Their is a chance that VS -> TCS will have mismatching interfaces > because we pack VS output in case its used by transform feedback but > don't pack TCS input for performance reasons. This patch will make > the > situation better but doesn't fix it. > > V2: Make is_varying_packing_safe() a function in the varying_matches > class, fix spelling (Matt) and make sure to remove the outer array > when dealing with Geom and Tess shaders where appropriate. > Lastly fix piglit regression in new piglit test and document the > undefined behaviour it depends on: > arb_separate_shader_objects/execution/vs-gs-linking.shader_test > > Cc: Samuel Iglesias Gonsálvez <sigles...@igalia.com> > Cc: Tapani Pälli <tapani.pa...@intel.com> > --- > This patch applies on top of my ARB_enhanced_layouts component > packing > series [1]. > > The GLES 3.1 and GL 4.4 changes were tested by changing the code to > always disable packing and running the changes in Intel CI system. > > This patch would also make it possible for a backend to do less > packing > in GLSL IR if it wished to do so now that the transform feedback > requirements > have been isolated. > > [1] http://patchwork.freedesktop.org/series/2101/ > > src/compiler/glsl/ir.h | 7 + > src/compiler/glsl/ir_optimization.h | 2 +- > src/compiler/glsl/link_varyings.cpp | 214 > +++++++++++++++++++++------- > src/compiler/glsl/lower_packed_varyings.cpp | 37 +++-- > 4 files changed, 202 insertions(+), 58 deletions(-) > > diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h > index e4513f0..83bb74b 100644 > --- a/src/compiler/glsl/ir.h > +++ b/src/compiler/glsl/ir.h > @@ -736,6 +736,13 @@ public: > unsigned is_unmatched_generic_inout:1; > > /** > + * Is this varying used only by transform feedback? > + * > + * This is used by the linker to decide if its safe to pack > the varying. > + */ > + unsigned is_xfb_only:1; > + > + /** > * If non-zero, then this variable may be packed along with > other variables > * into a single varying slot, so this offset should be > applied when > * accessing components. For example, an offset of 1 means > that the x > diff --git a/src/compiler/glsl/ir_optimization.h > b/src/compiler/glsl/ir_optimization.h > index a115c46..e201242 100644 > --- a/src/compiler/glsl/ir_optimization.h > +++ b/src/compiler/glsl/ir_optimization.h > @@ -126,7 +126,7 @@ void lower_packed_varyings(void *mem_ctx, > unsigned locations_used, ir_variable_mode > mode, > gl_shader *shader, unsigned > base_location, > bool disable_varying_packing, > - bool has_enhanced_layouts); > + bool xfb_enabled, bool > has_enhanced_layouts); > bool lower_vector_insert(exec_list *instructions, bool > lower_nonconstant_index); > bool lower_vector_derefs(gl_shader *shader); > void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader); > diff --git a/src/compiler/glsl/link_varyings.cpp > b/src/compiler/glsl/link_varyings.cpp > index 2974e76..8efaa33 100644 > --- a/src/compiler/glsl/link_varyings.cpp > +++ b/src/compiler/glsl/link_varyings.cpp > @@ -878,7 +878,7 @@ namespace { > class varying_matches > { > public: > - varying_matches(bool disable_varying_packing, > + varying_matches(bool disable_varying_packing, bool xfb_enabled, > gl_shader_stage producer_stage, > gl_shader_stage consumer_stage); > ~varying_matches(); > @@ -888,14 +888,30 @@ public: > void store_locations() const; > > private: > + bool is_varying_packing_safe(const glsl_type *type, > + const ir_variable *var); > + > /** > * If true, this driver disables varying packing, so all varyings > need to > * be aligned on slot boundaries, and take up a number of slots > equal to > * their number of matrix columns times their array size. > + * > + * Packing may also be disabled because our current packing > method is not > + * safe in SSO or versions of OpenGL where interpolation > qualifiers are not > + * guaranteed to match across stages. > */ > const bool disable_varying_packing; > > /** > + * If true, this driver has transform feedback enabled. The > transform > + * feedback code requires at least some packing be done even when > varying > + * packing is disabled, fortunately where transform feedback > requires > + * packing it's safe to override the disabled setting. See > + * is_varying_packing_safe(). > + */ > + const bool xfb_enabled; > + > + /** > * Enum representing the order in which varyings are packed > within a > * packing class. > * > @@ -914,6 +930,7 @@ private: > static unsigned compute_packing_class(const ir_variable *var); > static packing_order_enum compute_packing_order(const ir_variable > *var); > static int match_comparator(const void *x_generic, const void > *y_generic); > + static int xfb_comparator(const void *x_generic, const void > *y_generic); > > /** > * Structure recording the relationship between a single producer > output > @@ -969,9 +986,11 @@ private: > } /* anonymous namespace */ > > varying_matches::varying_matches(bool disable_varying_packing, > + bool xfb_enabled, > gl_shader_stage producer_stage, > gl_shader_stage consumer_stage) > : disable_varying_packing(disable_varying_packing), > + xfb_enabled(xfb_enabled), > producer_stage(producer_stage), > consumer_stage(consumer_stage) > { > @@ -994,6 +1013,24 @@ varying_matches::~varying_matches() > > > /** > + * Packing is always safe on individual arrays, structure and > matices. It is
matrices Reviewed-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> Thanks, Sam > + * also safe if the varying is only used for transform feedback. > + */ > +bool > +varying_matches::is_varying_packing_safe(const glsl_type *type, > + const ir_variable *var) > +{ > + if (consumer_stage == MESA_SHADER_TESS_EVAL || > + consumer_stage == MESA_SHADER_TESS_CTRL || > + producer_stage == MESA_SHADER_TESS_CTRL) > + return false; > + > + return xfb_enabled && (type->is_array() || type->is_record() || > + type->is_matrix() || var- > >data.is_xfb_only); > +} > + > + > +/** > * Record the given producer/consumer variable pair in the list of > variables > * that should later be assigned locations. > * > @@ -1072,7 +1109,7 @@ varying_matches::record(ir_variable > *producer_var, ir_variable *consumer_var) > = this->compute_packing_class(var); > this->matches[this->num_matches].packing_order > = this->compute_packing_order(var); > - if (this->disable_varying_packing) { > + if (this->disable_varying_packing && > !is_varying_packing_safe(type, var)) { > unsigned slots = type->count_attribute_slots(false); > this->matches[this->num_matches].num_components = slots * 4; > } else { > @@ -1098,37 +1135,28 @@ varying_matches::assign_locations(struct > gl_shader_program *prog, > uint64_t reserved_slots, > bool separate_shader) > { > - /* We disable varying sorting for separate shader programs for > the > - * following reasons: > - * > - * 1/ All programs must sort the code in the same order to > guarantee the > - * interface matching. However varying_matches::record() will > change the > - * interpolation qualifier of some stages. > - * > - * 2/ GLSL version 4.50 removes the matching constrain on the > interpolation > - * qualifier. > - * > - * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 > spec: > - * > - * "The type and presence of interpolation qualifiers of > variables with > - * the same name declared in all linked shaders for the same > cross-stage > - * interface must match, otherwise the link command will fail. > - * > - * When comparing an output from one stage to an input of a > subsequent > - * stage, the input and output don't match if their > interpolation > - * qualifiers (or lack thereof) are not the same." > - * > - * "It is a link-time error if, within the same stage, the > interpolation > - * qualifiers of variables of the same name do not match." > + /* If packing has been disabled then we cannot safely sort the > varyings by > + * class as it may mean we are using a version of OpenGL where > + * interpolation qualifiers are not guaranteed to be matching > across > + * shaders, sorting in this case could result in mismatching > shader > + * interfaces. > + * When packing is disabled the sort orders varyings used by > transform > + * feedback first, but also depends on *undefined behaviour* of > qsort to > + * reverse the order of the varyings. See: xfb_comparator(). > */ > - if (!separate_shader) { > + if (!this->disable_varying_packing) { > /* Sort varying matches into an order that makes them easy to > pack. */ > qsort(this->matches, this->num_matches, sizeof(*this- > >matches), > &varying_matches::match_comparator); > + } else { > + /* Only sort varyings that are only used by transform > feedback. */ > + qsort(this->matches, this->num_matches, sizeof(*this- > >matches), > + &varying_matches::xfb_comparator); > } > > unsigned generic_location = 0; > unsigned generic_patch_location = MAX_VARYING*4; > + bool previous_var_xfb_only = false; > > for (unsigned i = 0; i < this->num_matches; i++) { > unsigned *location = &generic_location; > @@ -1152,16 +1180,30 @@ varying_matches::assign_locations(struct > gl_shader_program *prog, > /* Advance to the next slot if this varying has a different > packing > * class than the previous one, and we're not already on a > slot > * boundary. > + * > + * Also advance to the next slot if packing is disabled. This > makes sure > + * we don't assign varyings the same locations which is > possible > + * because we still pack individual arrays, records and > matrices even > + * when packing is disabled. Note we don't advance to the next > slot if > + * we can pack varyings together that are only used for > transform > + * feedback. > */ > - if (i > 0 && > - this->matches[i - 1].packing_class > - != this->matches[i].packing_class) { > + if ((this->disable_varying_packing && > + !(previous_var_xfb_only && var->data.is_xfb_only)) || > + (i > 0 && this->matches[i - 1].packing_class > + != this->matches[i].packing_class )) { > *location = ALIGN(*location, 4); > } > > + previous_var_xfb_only = var->data.is_xfb_only; > + > unsigned num_elements = type- > >count_attribute_slots(is_vertex_input); > - unsigned slot_end = this->disable_varying_packing ? 4 : > - type->without_array()->vector_elements; > + unsigned slot_end; > + if (this->disable_varying_packing && > + !is_varying_packing_safe(type, var)) > + slot_end = 4; > + else > + slot_end = type->without_array()->vector_elements; > slot_end += *location - 1; > > /* FIXME: We could be smarter in the below code and loop back > over > @@ -1185,7 +1227,8 @@ varying_matches::assign_locations(struct > gl_shader_program *prog, > /* Increase the slot to make sure there is enough room for > next > * array element. > */ > - if (this->disable_varying_packing) > + if (this->disable_varying_packing && > + !is_varying_packing_safe(type, var)) > slot_end += 4; > else > slot_end += type->without_array()->vector_elements; > @@ -1311,6 +1354,32 @@ varying_matches::match_comparator(const void > *x_generic, const void *y_generic) > > > /** > + * Comparison function passed to qsort() to sort varyings used only > by > + * transform feedback when packing of other varyings is disabled. > + */ > +int > +varying_matches::xfb_comparator(const void *x_generic, const void > *y_generic) > +{ > + const match *x = (const match *) x_generic; > + > + if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) > + return match_comparator(x_generic, y_generic); > + > + /* FIXME: When the comparator returns 0 it means the elements > being > + * compared are equivalent. However the qsort documentation says: > + * > + * "The order of equivalent elements is undefined." > + * > + * In practice the sort ends up reversing the order of the > varyings which > + * means locations are also assigned in this reversed order and > happens to > + * be what we want. This is also whats happening in > + * varying_matches::match_comparator(). > + */ > + return 0; > +} > + > + > +/** > * Is the given variable a varying variable to be counted against > the > * limit in ctx->Const.MaxVarying? > * This includes variables such as texcoords, colors and generic > @@ -1628,26 +1697,67 @@ assign_varying_locations(struct gl_context > *ctx, > unsigned num_tfeedback_decls, > tfeedback_decl *tfeedback_decls) > { > - if (ctx->Const.DisableVaryingPacking) { > - /* Transform feedback code assumes varyings are packed, so if > the driver > - * has disabled varying packing, make sure it does not support > transform > - * feedback. > - */ > - assert(!ctx->Extensions.EXT_transform_feedback); > - } > + /* Transform feedback code assumes varying arrays are packed, so > if the > + * driver has disabled varying packing, make sure to at least > enable > + * packing required by transform feedback. > + */ > + bool xfb_enabled = ctx->Extensions.EXT_transform_feedback; > + > + /* Disable varying packing for GLES 3.1+ and GL 4.4+, there is no > guarantee > + * that interpolation qualifiers will match between shaders in > these > + * versions. We also disable packing for the outer facing > interfaces for > + * SSO as they also do not have the same restriction during > linking. > + * > + * Packing is still enabled on individual arrays, structs, and > matrices as > + * these are required by the transform feedback code and it is > still safe > + * to do so. We also enable packing when a varying is only used > for > + * transform feedback and its not a SSO. > + * > + * Varying packing currently only packs together varyings with > matching > + * interpolation qualifiers as the backends assume all packed > components > + * are to be processed in the same way. Therefore we cannot do > packing in > + * these versions of GL without the risk of mismatching > interfaces. > + * > + * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 > spec: > + * > + * "The type and presence of interpolation qualifiers of > variables with > + * the same name declared in all linked shaders for the same > cross-stage > + * interface must match, otherwise the link command will fail. > + * > + * When comparing an output from one stage to an input of a > subsequent > + * stage, the input and output don't match if their > interpolation > + * qualifiers (or lack thereof) are not the same." > + * > + * This text was also in at least revison 7 of the 4.40 spec but > is no > + * longer in revision 9 and not in the 4.50 spec. There is also > no text in > + * the GLSL ES spec that says these qualifiers must match across > stages. > + * > + * From Section 4.3.9 (Interpolation) of the GLSL ES 3.0 spec: > + * > + * "The type and presence of the interpolation qualifiers and > storage > + * qualifiers and invariant qualifiers of variables with the > same name > + * declared in all linked shaders must match, otherwise the > link command > + * will fail." > + * > + * This text is no longer in the 3.1 or 3.2 specs. > + */ > + bool disable_varying_packing = ctx->Const.DisableVaryingPacking; > + if ((ctx->API == API_OPENGL_CORE && ctx->Version >= 44) || > + (ctx->API == API_OPENGLES2 && ctx->Version >= 31) || > + (prog->SeparateShader && (producer == NULL || consumer == > NULL))) > + disable_varying_packing = true; > > /* Tessellation shaders treat inputs and outputs as shared memory > and can > * access inputs and outputs of other invocations. > * Therefore, they can't be lowered to temps easily (and > definitely not > * efficiently). > */ > - bool disable_varying_packing = > - ctx->Const.DisableVaryingPacking || > + disable_varying_packing = disable_varying_packing || > (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || > (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || > (producer && producer->Stage == MESA_SHADER_TESS_CTRL); > > - varying_matches matches(disable_varying_packing, > + varying_matches matches(disable_varying_packing, xfb_enabled, > producer ? producer->Stage : > (gl_shader_stage)-1, > consumer ? consumer->Stage : > (gl_shader_stage)-1); > hash_table *tfeedback_candidates > @@ -1767,8 +1877,10 @@ assign_varying_locations(struct gl_context > *ctx, > return false; > } > > - if (matched_candidate->toplevel_var- > >data.is_unmatched_generic_inout) > + if (matched_candidate->toplevel_var- > >data.is_unmatched_generic_inout) { > + matched_candidate->toplevel_var->data.is_xfb_only = 1; > matches.record(matched_candidate->toplevel_var, NULL); > + } > } > > const uint64_t reserved_slots = > @@ -1868,13 +1980,19 @@ assign_varying_locations(struct gl_context > *ctx, > if (vertex_attributes > 0) > lower_packed_varyings(mem_ctx, vertex_attributes, > ir_var_shader_in, producer, > - VERT_ATTRIB_GENERIC0, true, > + VERT_ATTRIB_GENERIC0, true, false, > ctx- > >Extensions.ARB_enhanced_layouts); > } > > + /* TCS inputs are not packed so don't enable packing required > for > + * transform feedback if the the consumer is a TCS. > + */ > + if (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) > + xfb_enabled = false; > + > lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, > producer, > - VARYING_SLOT_VAR0, > - disable_varying_packing, > + VARYING_SLOT_VAR0, > disable_varying_packing, > + xfb_enabled, > ctx->Extensions.ARB_enhanced_layouts); > } > > @@ -1891,13 +2009,13 @@ assign_varying_locations(struct gl_context > *ctx, > unsigned frag_outs = _mesa_bitcount_64(reserved_slots); > if (frag_outs > 0) > lower_packed_varyings(mem_ctx, frag_outs, > ir_var_shader_out, > - consumer, FRAG_RESULT_DATA0, true, > + consumer, FRAG_RESULT_DATA0, true, > false, > ctx- > >Extensions.ARB_enhanced_layouts); > } > > lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, > consumer, > - VARYING_SLOT_VAR0, > - disable_varying_packing, > + VARYING_SLOT_VAR0, > disable_varying_packing, > + xfb_enabled, > ctx->Extensions.ARB_enhanced_layouts); > } > > diff --git a/src/compiler/glsl/lower_packed_varyings.cpp > b/src/compiler/glsl/lower_packed_varyings.cpp > index d6d68fc..9cae548 100644 > --- a/src/compiler/glsl/lower_packed_varyings.cpp > +++ b/src/compiler/glsl/lower_packed_varyings.cpp > @@ -172,7 +172,8 @@ update_packed_array_type(const glsl_type *type, > const glsl_type *packed_type) > > static bool > needs_lowering(ir_variable *var, bool has_enhanced_layouts, > - bool disable_varying_packing) > + bool disable_varying_packing, bool xfb_enabled, > + bool is_outer_array_vert_idx) > { > /* Don't lower varying with explicit location unless > ARB_enhanced_layouts > * is enabled, also don't try to pack structs with explicit > location as > @@ -186,11 +187,16 @@ needs_lowering(ir_variable *var, bool > has_enhanced_layouts, > /* Don't disable packing for explicit locations when > ARB_enhanced_layouts > * is supported. > */ > - if (disable_varying_packing && !var->data.explicit_location) > + const glsl_type *type = > + is_outer_array_vert_idx ? var->type->fields.array : var->type; > + if (disable_varying_packing && !var->data.is_xfb_only && > + !var->data.explicit_location && > + !((type->is_array() || type->is_record() || type- > >is_matrix()) && > + xfb_enabled)) > return false; > > /* Things composed of vec4's don't need lowering everything else > does. */ > - const glsl_type *type = var->type->without_array(); > + type = type->without_array(); > if (type->vector_elements == 4 && !type->is_double()) > return false; > return true; > @@ -285,6 +291,7 @@ public: > exec_list *out_variables, > unsigned base_location, > bool disable_varying_packing, > + bool xfb_enabled, > bool has_enhanced_layouts); > > void run(struct gl_shader *shader); > @@ -353,6 +360,7 @@ private: > exec_list *out_variables; > > bool disable_varying_packing; > + bool xfb_enabled; > bool has_enhanced_layouts; > }; > > @@ -362,7 +370,8 @@ > lower_packed_varyings_visitor::lower_packed_varyings_visitor( > void *mem_ctx, unsigned locations_used, ir_variable_mode mode, > bool is_outer_array_vert_idx, exec_list *out_instructions, > exec_list *out_variables, unsigned base_location, > - bool disable_varying_packing, bool has_enhanced_layouts) > + bool disable_varying_packing, bool xfb_enabled, > + bool has_enhanced_layouts) > : mem_ctx(mem_ctx), > base_location(base_location), > locations_used(locations_used), > @@ -374,6 +383,7 @@ > lower_packed_varyings_visitor::lower_packed_varyings_visitor( > out_instructions(out_instructions), > out_variables(out_variables), > disable_varying_packing(disable_varying_packing), > + xfb_enabled(xfb_enabled), > has_enhanced_layouts(has_enhanced_layouts) > { > } > @@ -388,7 +398,8 @@ lower_packed_varyings_visitor::run(struct > gl_shader *shader) > > if (var->data.mode != this->mode || > var->data.location < (int) this->base_location || > - !needs_lowering(var, has_enhanced_layouts, > disable_varying_packing)) > + !needs_lowering(var, has_enhanced_layouts, > disable_varying_packing, > + xfb_enabled, is_outer_array_vert_idx)) > continue; > > /* This lowering pass is only capable of packing floats and > ints > @@ -1118,7 +1129,7 @@ void > lower_packed_varyings(void *mem_ctx, unsigned locations_used, > ir_variable_mode mode, gl_shader *shader, > unsigned base_location, bool > disable_varying_packing, > - bool has_enhanced_layouts) > + bool xfb_enabled, bool has_enhanced_layouts) > { > ir_function *main_func = shader->symbols->get_function("main"); > exec_list void_parameters; > @@ -1143,6 +1154,7 @@ lower_packed_varyings(void *mem_ctx, unsigned > locations_used, > &new_variables, > base_location, > disable_varying_packing, > + xfb_enabled, > has_enhanced_layouts); > visitor.run(shader); > if (mode == ir_var_shader_out) { > @@ -1189,9 +1201,16 @@ lower_packed_varyings(void *mem_ctx, unsigned > locations_used, > continue; > > if (var->data.mode != mode || > - var->data.location < (int) base_location || > - !needs_lowering(var, has_enhanced_layouts, true)) > - continue; > + var->data.location < (int) base_location) > + continue; > + > + bool is_outer_array_vertex_idx = false; > + if (!var->data.patch) > + is_outer_array_vertex_idx = true; > + > + if (!needs_lowering(var, has_enhanced_layouts, true, false, > + is_outer_array_vertex_idx)) > + continue; > > /* Clone the variable for program resource list before > * it gets modified and lost. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev