On Tue, Apr 28, 2015 at 11:08:15PM +0300, Abdiel Janulgue wrote: > When the const block and offset are immediate values. Otherwise just > fall-back to the previous method of uploading the UBO constant data to > GRF using pull constants. > > Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com> > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 11 ++++ > src/mesa/drivers/dri/i965/brw_fs.h | 4 ++ > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 86 > +++++++++++++++++++++++++++- > 3 files changed, 100 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 071ac59..031d807 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -2273,6 +2273,7 @@ fs_visitor::assign_constant_locations() > } > > stage_prog_data->nr_params = 0; > + stage_prog_data->nr_ubo_params = ubo_uniforms; > > unsigned const_reg_access[uniforms]; > memset(const_reg_access, 0, sizeof(const_reg_access)); > @@ -2302,6 +2303,16 @@ fs_visitor::assign_constant_locations() > stage_prog_data->gather_table[p].channel_mask = > const_reg_access[i]; > } > + > + for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) { > + int p = stage_prog_data->nr_gather_table++; > + stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg; > + stage_prog_data->gather_table[p].channel_mask = > this->ubo_gather_table[i].channel_mask; > + stage_prog_data->gather_table[p].const_block = > this->ubo_gather_table[i].const_block; > + stage_prog_data->gather_table[p].const_offset = > this->ubo_gather_table[i].const_offset; > + stage_prog_data->max_ubo_const_block = > MAX2(stage_prog_data->max_ubo_const_block, > + > this->ubo_gather_table[i].const_block);
These are all overflowing 80 columns. > + } > } > > /** > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 32063f0..a48b2bb 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -417,6 +417,7 @@ public: > void setup_uniform_values(ir_variable *ir); > void setup_builtin_uniform_values(ir_variable *ir); > int implied_mrf_writes(fs_inst *inst); > + bool generate_ubo_gather_table(ir_expression* ir); > > virtual void dump_instructions(); > virtual void dump_instructions(const char *name); > @@ -445,6 +446,9 @@ public: > /** Total number of direct uniforms we can get from NIR */ > unsigned num_direct_uniforms; > > + /** Number of ubo uniform variable components visited. */ > + unsigned ubo_uniforms; > + > /** Byte-offset for the next available spot in the scratch space buffer. > */ > unsigned last_scratch; > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 4e99366..11e608b 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -1179,11 +1179,18 @@ fs_visitor::visit(ir_expression *ir) > emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]); > break; > case ir_binop_ubo_load: { > + /* Use gather push constants if at all possible, otherwise just > + * fall back to pull constants for UBOs > + */ > + if (generate_ubo_gather_table(ir)) > + break; > + > /* This IR node takes a constant uniform block and a constant or > * variable byte offset within the block and loads a vector from that. > */ > ir_constant *const_uniform_block = ir->operands[0]->as_constant(); > ir_constant *const_offset = ir->operands[1]->as_constant(); > + Not part of this patch. > fs_reg surf_index; > > if (const_uniform_block) { > @@ -4144,6 +4151,79 @@ fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, > fs_reg *reg) > *reg = neg_result; > } > > +bool > +fs_visitor::generate_ubo_gather_table(ir_expression *ir) > +{ > + ir_constant *const_uniform_block = ir->operands[0]->as_constant(); > + ir_constant *const_offset = ir->operands[1]->as_constant(); These are only used for reading, lets use constant pointers. > + > + if (ir->operation != ir_binop_ubo_load || > + !brw->has_resource_streamer || > + !brw->fs_ubo_gather || > + !const_uniform_block || Not really the style used elsewhere, don't align "||". > + !const_offset) > + return false; > + > + /* Only allow 16 registers (128 uniform components) as push constants. > + */ Move the comment closing to the previous line. > + unsigned int max_push_components = 16 * 8; > + unsigned param_index = uniforms + ubo_uniforms; These could be both declared as const. > + if ((param_index + ir->type->vector_elements) >= max_push_components) > + return false; > + > + fs_reg reg; > + if (dispatch_width == 16) { > + for (int i = 0; i < (int) this->nr_ubo_gather_table; i++) { > + if ((this->ubo_gather_table[i].const_block == > + const_uniform_block->value.u[0]) && > + (this->ubo_gather_table[i].const_offset == > + const_offset->value.u[0])) { > + reg = fs_reg(UNIFORM, this->ubo_gather_table[i].reg); > + reg.type = brw_type_for_base_type(ir->type); > + break; > + } > + } > + assert(reg.file == UNIFORM); > + } > + > + if (reg.file != UNIFORM) { > + reg = fs_reg(UNIFORM, param_index); > + int gather = this->nr_ubo_gather_table++; > + > + assert(ir->type->vector_elements <= 4); > + ubo_uniforms += ir->type->vector_elements; > + this->ubo_gather_table[gather].reg = reg.reg; > + this->ubo_gather_table[gather].const_block = > + const_uniform_block->value.u[0]; > + this->ubo_gather_table[gather].const_offset = > + const_offset->value.u[0]; > + reg.type = brw_type_for_base_type(ir->type); > + } > + > + if (ir->type->base_type == GLSL_TYPE_BOOL) { > + Extra new line. > + for (int i = 0; i < ir->type->vector_elements; i++) { > + Here also. > + /* The std140 packing rules don't allow vectors to cross 16-byte > + * boundaries, and a reg is 32 bytes. > + */ > + assert(reg.subreg_offset < 32); > + > + /* UBO bools are any nonzero value. We consider bools to be > + * values with the low bit set to 1. Convert them using CMP. > + */ > + emit(CMP(result, reg, fs_reg(0u), BRW_CONDITIONAL_NZ)); > + > + result = offset(result, 1); > + } > + result.reg_offset = 0; > + } else { > + result = reg; > + } > + > + return true; > +} > + > fs_visitor::fs_visitor(struct brw_context *brw, > void *mem_ctx, > const struct brw_wm_prog_key *key, > @@ -4224,6 +4304,7 @@ fs_visitor::init() > this->regs_live_at_ip = NULL; > > this->uniforms = 0; > + this->ubo_uniforms = 0; > this->last_scratch = 0; > this->pull_constant_loc = NULL; > this->push_constant_loc = NULL; > @@ -4231,8 +4312,11 @@ fs_visitor::init() > this->spilled_any_registers = false; > this->do_dual_src = false; > > - if (dispatch_width == 8) > + if (dispatch_width == 8) { > this->param_size = rzalloc_array(mem_ctx, int, > stage_prog_data->nr_params); > + this->ubo_gather_table = rzalloc_array(mem_ctx, > backend_visitor::gather_table, > + stage_prog_data->nr_params); > + } > } > > fs_visitor::~fs_visitor() > -- > 1.9.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev