On Fri, May 06, 2016 at 08:56:08AM +0200, Samuel Iglesias Gons?lvez wrote: > When there is a mix of definitions of uniforms with 32-bit or 64-bit > data type sizes, the driver ends up doing misaligned access to double > based variables in the push constant buffer. > > To fix this, this patch pushes first all the 64-bit variables and > then the rest. Then, all the variables would be aligned to > its data type size. > > Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 113 > +++++++++++++++++++++++++---------- > 1 file changed, 83 insertions(+), 30 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 65aa3c7..7eaf1b4 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -39,6 +39,7 @@ > #include "brw_program.h" > #include "brw_dead_control_flow.h" > #include "compiler/glsl_types.h" > +#include "program/prog_parameter.h" > > using namespace brw; > > @@ -2004,6 +2005,45 @@ fs_visitor::compact_virtual_grfs() > return progress; > } > > +static void > +set_push_pull_constant_loc(unsigned uniform, int &chunk_start, bool > contiguous, > + int *push_constant_loc, int *pull_constant_loc, > + unsigned &num_push_constants, > + unsigned &num_pull_constants,
I don't remember us using writable references elsewhere, and vaguely recall there being a debate about this and decision to use pointers instead? > + const unsigned max_push_components, > + const unsigned max_chunk_size, > + struct brw_stage_prog_data *stage_prog_data) > +{ > + /* This is the first live uniform in the chunk */ > + if (chunk_start < 0) > + chunk_start = uniform; > + > + /* If this element does not need to be contiguous with the next, we > + * split at this point and everthing between chunk_start and u forms a ^ everything > + * single chunk. > + */ > + if (!contiguous) { > + unsigned chunk_size = uniform - chunk_start + 1; > + > + /* Decide whether we should push or pull this parameter. In the > + * Vulkan driver, push constants are explicitly exposed via the API > + * so we push everything. In GL, we only push small arrays. > + */ > + if (stage_prog_data->pull_param == NULL || > + (num_push_constants + chunk_size <= max_push_components && > + chunk_size <= max_chunk_size)) { > + assert(num_push_constants + chunk_size <= max_push_components); > + for (unsigned j = chunk_start; j <= uniform; j++) > + push_constant_loc[j] = num_push_constants++; > + } else { > + for (unsigned j = chunk_start; j <= uniform; j++) > + pull_constant_loc[j] = num_pull_constants++; > + } > + > + chunk_start = -1; > + } > +} > + > /** > * Assign UNIFORM file registers to either push constants or pull constants. > * > @@ -2022,6 +2062,8 @@ fs_visitor::assign_constant_locations() > > bool is_live[uniforms]; > memset(is_live, 0, sizeof(is_live)); > + bool is_live_64bit[uniforms]; > + memset(is_live_64bit, 0, sizeof(is_live_64bit)); > > /* For each uniform slot, a value of true indicates that the given slot > and > * the next slot must remain contiguous. This is used to keep us from > @@ -2054,14 +2096,21 @@ fs_visitor::assign_constant_locations() > for (unsigned j = constant_nr; j < last; j++) { > is_live[j] = true; > contiguous[j] = true; > + if (type_sz(inst->src[i].type) == 8) { > + is_live_64bit[j] = true; > + } > } > is_live[last] = true; > } else { > if (constant_nr >= 0 && constant_nr < (int) uniforms) { > int regs_read = inst->components_read(i) * > type_sz(inst->src[i].type) / 4; > - for (int j = 0; j < regs_read; j++) > + for (int j = 0; j < regs_read; j++) { > is_live[constant_nr + j] = true; > + if (type_sz(inst->src[i].type) == 8) { > + is_live_64bit[constant_nr + j] = true; > + } > + } > } > } > } > @@ -2090,43 +2139,46 @@ fs_visitor::assign_constant_locations() > pull_constant_loc = ralloc_array(mem_ctx, int, uniforms); > > int chunk_start = -1; > + > + Extra newline. > + /* First push 64-bit uniforms */ > for (unsigned u = 0; u < uniforms; u++) { > - push_constant_loc[u] = -1; > + if (!is_live[u] || !is_live_64bit[u]) > + continue; > + > pull_constant_loc[u] = -1; > + push_constant_loc[u] = -1; > > - if (!is_live[u]) > - continue; > + set_push_pull_constant_loc(u, chunk_start, contiguous[u], > + push_constant_loc, pull_constant_loc, > + num_push_constants, num_pull_constants, > + max_push_components, max_chunk_size, > + stage_prog_data); > > - /* This is the first live uniform in the chunk */ > - if (chunk_start < 0) > - chunk_start = u; > + } > > - /* If this element does not need to be contiguous with the next, we > - * split at this point and everthing between chunk_start and u forms a > - * single chunk. > - */ > - if (!contiguous[u]) { > - unsigned chunk_size = u - chunk_start + 1; > + /* Then push the rest of uniforms */ > + for (unsigned u = 0; u < uniforms; u++) { > + if (!is_live[u] || is_live_64bit[u]) > + continue; > > - /* Decide whether we should push or pull this parameter. In the > - * Vulkan driver, push constants are explicitly exposed via the API > - * so we push everything. In GL, we only push small arrays. > - */ > - if (stage_prog_data->pull_param == NULL || > - (num_push_constants + chunk_size <= max_push_components && > - chunk_size <= max_chunk_size)) { > - assert(num_push_constants + chunk_size <= max_push_components); > - for (unsigned j = chunk_start; j <= u; j++) > - push_constant_loc[j] = num_push_constants++; > - } else { > - for (unsigned j = chunk_start; j <= u; j++) > - pull_constant_loc[j] = num_pull_constants++; > - } > + pull_constant_loc[u] = -1; > + push_constant_loc[u] = -1; > > - chunk_start = -1; > - } > + set_push_pull_constant_loc(u, chunk_start, contiguous[u], > + push_constant_loc, pull_constant_loc, > + num_push_constants, num_pull_constants, > + max_push_components, max_chunk_size, > + stage_prog_data); > } > > + /* As the uniforms are going to be reordered, take the data from a > temporal > + * copy of the original param[]. > + */ > + gl_constant_value **param = rzalloc_array(mem_ctx, gl_constant_value*, > + stage_prog_data->nr_params); > + memcpy(param, stage_prog_data->param, > + sizeof(gl_constant_value*) * stage_prog_data->nr_params); > stage_prog_data->nr_params = num_push_constants; > stage_prog_data->nr_pull_params = num_pull_constants; > > @@ -2139,7 +2191,7 @@ fs_visitor::assign_constant_locations() > * having to make a copy. > */ > for (unsigned int i = 0; i < uniforms; i++) { > - const gl_constant_value *value = stage_prog_data->param[i]; > + const gl_constant_value *value = param[i]; > > if (pull_constant_loc[i] != -1) { > stage_prog_data->pull_param[pull_constant_loc[i]] = value; > @@ -2147,6 +2199,7 @@ fs_visitor::assign_constant_locations() > stage_prog_data->param[push_constant_loc[i]] = value; > } > } > + ralloc_free(param); > } > > /** > -- > 2.5.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev