Consider GLSL code such as: const ivec2 offsets[] = ivec2[](ivec2(-1, -1), ivec2(-1, 0), ivec2(-1, 1), ivec2(0, -1), ivec2(0, 0), ivec2(0, 1), ivec2(1, -1), ivec2(1, 0), ivec2(1, 1));
ivec2 offset = offsets[<non-constant expression>]; Both i965 and nv50 currently handle this very poorly. On i965, this becomes a pile of MOVs to load the immediate constants into registers, a pile of scratch writes to move the whole array to memory, and one scratch read to actually access the value - effectively the same as if it were a non-constant array. We'd much rather upload large blocks of constant data as uniform data, so drivers can simply upload the data via constbufs, and not have to populate it via shader instructions. This is currently non-optional because both i965 and nouveau benefit from it, and according to Marek radeonsi would benefit today as well. (According to Tom, radeonsi may want to handle this itself in the long term, but we can always add a flag when it becomes useful.) Improves performance in a terrain rendering microbenchmark by about 2x, and cuts the number of instructions in about half. Helps a lot of "Natural Selection 2" shaders, as well as one "HOARD" shader. total instructions in shared programs: 5473459 -> 5471765 (-0.03%) instructions in affected programs: 5880 -> 4186 (-28.81%) v2: Use ir_var_hidden to avoid exposing the new uniform via the GL uniform introspection API. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77957 Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> --- src/glsl/Makefile.sources | 1 + src/glsl/ir_optimization.h | 1 + src/glsl/linker.cpp | 2 + src/glsl/lower_const_arrays_to_uniforms.cpp | 102 ++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+) create mode 100644 src/glsl/lower_const_arrays_to_uniforms.cpp diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 0c55327..6aed52d 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -58,6 +58,7 @@ LIBGLSL_FILES = \ $(GLSL_SRCDIR)/loop_analysis.cpp \ $(GLSL_SRCDIR)/loop_controls.cpp \ $(GLSL_SRCDIR)/loop_unroll.cpp \ + $(GLSL_SRCDIR)/lower_const_arrays_to_uniforms.cpp \ $(GLSL_SRCDIR)/lower_clip_distance.cpp \ $(GLSL_SRCDIR)/lower_discard.cpp \ $(GLSL_SRCDIR)/lower_discard_flow.cpp \ diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index e25857a..34e0b4b 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -114,6 +114,7 @@ bool lower_noise(exec_list *instructions); bool lower_variable_index_to_cond_assign(exec_list *instructions, bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform); bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); +bool lower_const_arrays_to_uniforms(exec_list *instructions); bool lower_clip_distance(gl_shader *shader); void lower_output_reads(exec_list *instructions); bool lower_packing_builtins(exec_list *instructions, int op_mask); diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 2d31801..bd2aa3c 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2678,6 +2678,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) &ctx->Const.ShaderCompilerOptions[i], ctx->Const.NativeIntegers)) ; + + lower_const_arrays_to_uniforms(prog->_LinkedShaders[i]->ir); } /* Check and validate stream emissions in geometry shaders */ diff --git a/src/glsl/lower_const_arrays_to_uniforms.cpp b/src/glsl/lower_const_arrays_to_uniforms.cpp new file mode 100644 index 0000000..b3c0ee2 --- /dev/null +++ b/src/glsl/lower_const_arrays_to_uniforms.cpp @@ -0,0 +1,102 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_const_arrays_to_uniforms.cpp + * + * Lower constant arrays to uniform arrays. + * + * Some driver backends (such as i965 and nouveau) don't handle constant arrays + * gracefully, instead treating them as ordinary writable temporary arrays. + * Since arrays can be large, this often means spilling them to scratch memory, + * which usually involves a large number of instructions. + * + * This must be called prior to link_set_uniform_initializers(); we need the + * linker to process our new uniform's constant initializer. + * + * This should be called after optimizations, since those can result in + * splitting and removing arrays that are indexed by constant expressions. + */ +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "glsl_types.h" + +namespace { +class lower_const_array_visitor : public ir_rvalue_visitor { +public: + lower_const_array_visitor(exec_list *insts) + { + instructions = insts; + progress = false; + } + + bool run() + { + visit_list_elements(this, instructions); + return progress; + } + + void handle_rvalue(ir_rvalue **rvalue); + +private: + exec_list *instructions; + bool progress; +}; + +void +lower_const_array_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_constant *con = (*rvalue)->as_constant(); + if (!con || !con->type->is_array()) + return; + + void *mem_ctx = ralloc_parent(con); + + ir_variable *uni = + new(mem_ctx) ir_variable(con->type, "constarray", ir_var_uniform); + uni->constant_initializer = con; + uni->constant_value = con; + uni->data.has_initializer = true; + uni->data.how_declared = ir_var_hidden; + uni->data.read_only = true; + /* Assume the whole thing is accessed. */ + uni->data.max_array_access = uni->type->length - 1; + instructions->push_head(uni); + + *rvalue = new(mem_ctx) ir_dereference_variable(uni); + + progress = true; +} + +} /* anonymous namespace */ + +bool +lower_const_arrays_to_uniforms(exec_list *instructions) +{ + lower_const_array_visitor v(instructions); + return v.run(); +} -- 2.1.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev