On Thu, Jul 6, 2017 at 5:22 PM, Kenneth Graunke <kenn...@whitecape.org> wrote: > This actually takes advantage of the newly pushed UBO data, avoiding > pull loads. > > XXX: quote performance numbers > --- > src/intel/compiler/brw_fs.cpp | 35 ++++++++++++++++++++++++++++++++++- > src/intel/compiler/brw_fs.h | 2 ++ > src/intel/compiler/brw_fs_nir.cpp | 28 ++++++++++++++++++++++++++++ > 3 files changed, 64 insertions(+), 1 deletion(-) > > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp > index 49e714f1c1f..68648eda64d 100644 > --- a/src/intel/compiler/brw_fs.cpp > +++ b/src/intel/compiler/brw_fs.cpp > @@ -1386,7 +1386,9 @@ fs_visitor::assign_curb_setup() > unsigned uniform_push_length = DIV_ROUND_UP(stage_prog_data->nr_params, > 8); > > unsigned ubo_push_length = 0; > + unsigned ubo_push_start[4]; > for (int i = 0; i < 4; i++) { > + ubo_push_start[i] = 8 * (ubo_push_length + uniform_push_length); > ubo_push_length += stage_prog_data->ubo_ranges[i].length; > } > > @@ -1398,7 +1400,11 @@ fs_visitor::assign_curb_setup() > if (inst->src[i].file == UNIFORM) { > int uniform_nr = inst->src[i].nr + inst->src[i].offset / 4; > int constant_nr; > - if (uniform_nr >= 0 && uniform_nr < (int) uniforms) { > + if (inst->src[i].nr >= UBO_START) { > + /* constant_nr is in 32-bit units, the rest are in bytes */ > + constant_nr = ubo_push_start[inst->src[i].nr - UBO_START] + > + inst->src[i].offset / 4; > + } else if (uniform_nr >= 0 && uniform_nr < (int) uniforms) { > constant_nr = push_constant_loc[uniform_nr]; > } else { > /* Section 5.11 of the OpenGL 4.1 spec says: > @@ -2069,6 +2075,20 @@ fs_visitor::assign_constant_locations() > stage_prog_data->nr_params = num_push_constants; > stage_prog_data->nr_pull_params = num_pull_constants; > > + /* Now that we know how many regular uniforms we'll push, reduce the > + * UBO push ranges so we don't exceed the 3DSTATE_CONSTANT limits. > + */ > + unsigned push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8); > + for (int i = 0; i < 4; i++) { > + struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; > + > + if (push_length + range->length > 64) > + range->length = 64 - push_length; > + > + push_length += range->length; > + } > + assert(push_length <= 64); > + > /* Up until now, the param[] array has been indexed by reg + offset > * of UNIFORM registers. Move pull constants into pull_param[] and > * condense param[] to only contain the uniforms we chose to push. > @@ -2103,6 +2123,19 @@ fs_visitor::get_pull_locs(const fs_reg &src, > { > assert(src.file == UNIFORM); > > + if (src.nr >= UBO_START) { > + const struct brw_ubo_range *range = > + &prog_data->ubo_ranges[src.nr - UBO_START]; > + > + /* If this access is in our (reduced) range, use the push data. */ > + if (src.offset / 32 < range->length && !getenv("PULL"))
If the environment variable is useful, make it part of INTEL_DEBUG? _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev