On Monday, April 17, 2017 10:52:23 PM PDT Timothy Arceri wrote: > From: Timothy Arceri <timothy.arc...@collabora.com> > > If packing doesn't cross locations we can easily make use of > ARB_enhanced_layouts to do packing rather than using the GLSL IR > lowering pass lower_packed_varyings(). > > Shader-db Broadwell results: > > total instructions in shared programs: 12977822 -> 12977819 (-0.00%) > instructions in affected programs: 1871 -> 1868 (-0.16%) > helped: 4 > HURT: 3 > > total cycles in shared programs: 246567288 -> 246567668 (0.00%) > cycles in affected programs: 1370386 -> 1370766 (0.03%) > helped: 592 > HURT: 733 > --- > src/compiler/glsl/link_varyings.cpp | 68 > +++++++++++++++++++++++++++++++++++++ > 1 file changed, 68 insertions(+) > > diff --git a/src/compiler/glsl/link_varyings.cpp > b/src/compiler/glsl/link_varyings.cpp > index 37297be..5902421 100644 > --- a/src/compiler/glsl/link_varyings.cpp > +++ b/src/compiler/glsl/link_varyings.cpp > @@ -1210,20 +1210,21 @@ store_tfeedback_info(struct gl_context *ctx, struct > gl_shader_program *prog, > namespace { > > /** > * Data structure recording the relationship between outputs of one shader > * stage (the "producer") and inputs of another (the "consumer"). > */ > class varying_matches > { > public: > varying_matches(bool disable_varying_packing, bool xfb_enabled, > + bool enhanced_layouts_enabled, > gl_shader_stage producer_stage, > gl_shader_stage consumer_stage); > ~varying_matches(); > void record(ir_variable *producer_var, ir_variable *consumer_var); > unsigned assign_locations(struct gl_shader_program *prog, > uint8_t *components, > uint64_t reserved_slots); > void store_locations() const; > > private: > @@ -1243,20 +1244,22 @@ private: > > /** > * If true, this driver has transform feedback enabled. The transform > * feedback code requires at least some packing be done even when varying > * packing is disabled, fortunately where transform feedback requires > * packing it's safe to override the disabled setting. See > * is_varying_packing_safe(). > */ > const bool xfb_enabled; > > + const bool enhanced_layouts_enabled; > + > /** > * Enum representing the order in which varyings are packed within a > * packing class. > * > * Currently we pack vec4's first, then vec2's, then scalar values, then > * vec3's. This order ensures that the only vectors that are at risk of > * having to be "double parked" (split between two adjacent varying slots) > * are the vec3's. > */ > enum packing_order_enum { > @@ -1319,24 +1322,26 @@ private: > unsigned matches_capacity; > > gl_shader_stage producer_stage; > gl_shader_stage consumer_stage; > }; > > } /* anonymous namespace */ > > varying_matches::varying_matches(bool disable_varying_packing, > bool xfb_enabled, > + bool enhanced_layouts_enabled, > gl_shader_stage producer_stage, > gl_shader_stage consumer_stage) > : disable_varying_packing(disable_varying_packing), > xfb_enabled(xfb_enabled), > + enhanced_layouts_enabled(enhanced_layouts_enabled), > producer_stage(producer_stage), > consumer_stage(consumer_stage) > { > /* Note: this initial capacity is rather arbitrarily chosen to be large > * enough for many cases without wasting an unreasonable amount of space. > * varying_matches::record() will resize the array if there are more than > * this number of varyings. > */ > this->matches_capacity = 8; > this->matches = (match *) > @@ -1615,37 +1620,99 @@ varying_matches::assign_locations(struct > gl_shader_program *prog, > } > > > /** > * Update the producer and consumer shaders to reflect the locations > * assignments that were made by varying_matches::assign_locations(). > */ > void > varying_matches::store_locations() const > { > + /* Check is location needs to be packed with lower_packed_varyings() or if > + * we can just use ARB_enhanced_layouts packing. > + */ > + bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 }; > + const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} }; > + > for (unsigned i = 0; i < this->num_matches; i++) { > ir_variable *producer_var = this->matches[i].producer_var; > ir_variable *consumer_var = this->matches[i].consumer_var; > unsigned generic_location = this->matches[i].generic_location; > unsigned slot = generic_location / 4; > unsigned offset = generic_location % 4; > > if (producer_var) { > producer_var->data.location = VARYING_SLOT_VAR0 + slot; > producer_var->data.location_frac = offset; > } > > if (consumer_var) { > assert(consumer_var->data.location == -1); > consumer_var->data.location = VARYING_SLOT_VAR0 + slot; > consumer_var->data.location_frac = offset; > } > + > + /* Find locations suitable for native packing via > + * ARB_enhanced_layouts. > + */ > + if (producer_var && consumer_var) { > + if (enhanced_layouts_enabled) { > + const glsl_type *type = > + get_varying_type(producer_var, producer_stage); > + if (type->is_array() || type->is_matrix() || type->is_record() || > + type->is_double()) { > + unsigned comp_slots = type->component_slots() + offset; > + unsigned slots = comp_slots / 4; > + if (comp_slots % 4) > + slots += 1; > + > + for (unsigned j = 0; j < slots; j++) { > + pack_loc[slot + j] = true; > + } > + } else if (offset + type->vector_elements > 4) { > + pack_loc[slot] = true; > + pack_loc[slot + 1] = true; > + } else { > + loc_type[slot][offset] = type; > + } > + } > + } > + } > + > + /* Attempt to use ARB_enhanced_layouts for more efficient packing if > + * suitable. > + */ > + if (enhanced_layouts_enabled) { > + for (unsigned i = 0; i < this->num_matches; i++) { > + ir_variable *producer_var = this->matches[i].producer_var; > + ir_variable *consumer_var = this->matches[i].consumer_var; > + unsigned generic_location = this->matches[i].generic_location; > + unsigned slot = generic_location / 4; > + > + if (pack_loc[slot] || !producer_var || !consumer_var) > + continue; > + > + const glsl_type *type = > + get_varying_type(producer_var, producer_stage); > + bool type_match = true; > + for (unsigned j = 0; j < 4; j++) { > + if (loc_type[slot][j]) { > + if (type->base_type != loc_type[slot][j]->base_type) > + type_match = false; > + } > + } > + > + if (type_match) { > + producer_var->data.explicit_location = 1; > + consumer_var->data.explicit_location = 1;
You probably want to set producer_var->data.explicit_component = 1; consumer_var->data.explicit_component = 1; as well. It doesn't seem to affect code generation, but it does make ir_print_visitor actually print the component info.
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev