When the const block and offset are immediate values. Otherwise just fall-back to the previous method of uploading the UBO constant data to GRF using pull constants.
Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com> --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 13 ++++ src/mesa/drivers/dri/i965/brw_vec4.h | 2 + src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 75 +++++++++++++++++++++++ 4 files changed, 92 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index f2b03f8..549fcd3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -604,6 +604,18 @@ vec4_visitor::generate_gather_table() stage_prog_data->gather_table[p].reg = -1; stage_prog_data->gather_table[p].channel_mask = 0xf; } + + for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) { + int p = stage_prog_data->nr_gather_table++; + stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg; + stage_prog_data->gather_table[p].channel_mask = this->ubo_gather_table[i].channel_mask; + stage_prog_data->gather_table[p].const_block = this->ubo_gather_table[i].const_block; + stage_prog_data->gather_table[p].const_offset = this->ubo_gather_table[i].const_offset; + stage_prog_data->max_ubo_const_block = MAX2(stage_prog_data->max_ubo_const_block, + this->ubo_gather_table[i].const_block); + } + + stage_prog_data->nr_ubo_params = ubo_uniforms; } /** @@ -1991,6 +2003,7 @@ brw_vs_emit(struct brw_context *brw, vp, prog, brw_select_clip_planes(&brw->ctx), mem_ctx, st_index, !_mesa_is_gles3(&brw->ctx)); + v.use_gather_constants = brw->vs_ubo_gather && brw->use_resource_streamer; if (!v.run()) { if (prog) { prog->LinkStatus = false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 534f1b1..0888ec7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -163,6 +163,7 @@ public: int *uniform_vector_size; int uniform_array_size; /*< Size of uniform_[vector_]size arrays */ int uniforms; + int ubo_uniforms; src_reg shader_start_time; @@ -403,6 +404,7 @@ public: void dump_instruction(backend_instruction *inst, FILE *file); void visit_atomic_counter_intrinsic(ir_call *ir); + bool generate_ubo_gather_table(ir_expression *ir, const dst_reg &result_dst); bool is_high_sampler(src_reg sampler); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index b9694f6..5a85a21 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -679,6 +679,8 @@ brw_gs_emit(struct brw_context *brw, vec4_gs_visitor v(brw->intelScreen->compiler, brw, c, prog, mem_ctx, true /* no_spills */, st_index); + v.use_gather_constants = brw->gs_ubo_gather && + brw->use_resource_streamer; if (v.run()) { return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base, mem_ctx, v.cfg, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f6e59ce..4bba4a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1828,6 +1828,12 @@ vec4_visitor::visit(ir_expression *ir) break; case ir_binop_ubo_load: { + /* Use gather push constants if at all possible, otherwise just + * fall back to pull constants for UBOs + */ + if (generate_ubo_gather_table(ir, result_dst)) + break; + ir_constant *const_uniform_block = ir->operands[0]->as_constant(); ir_constant *const_offset_ir = ir->operands[1]->as_constant(); unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0; @@ -3688,6 +3694,67 @@ vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg) *reg = neg_result; } +bool +vec4_visitor::generate_ubo_gather_table(ir_expression *ir, const dst_reg &result_dst) +{ + ir_constant *const_uniform_block = ir->operands[0]->as_constant(); + ir_constant *const_offset_ir = ir->operands[1]->as_constant(); + unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0; + + if (ir->operation != ir_binop_ubo_load || + !use_gather_constants || + !const_uniform_block || + !const_offset_ir) + return false; + + /* Only allow 32 registers (256 uniform components) as push constants, + */ + int max_uniform_components = 32 * 8; + int param_index = uniforms + ubo_uniforms; + if ((param_index + ir->type->vector_elements) >= max_uniform_components) + return false; + + dst_reg reg; + for (int i = 0; i < (int) this->nr_ubo_gather_table; i++) { + if ((this->ubo_gather_table[i].const_block == + const_uniform_block->value.u[0]) && + (this->ubo_gather_table[i].const_offset == + const_offset)) { + reg = dst_reg(UNIFORM, this->ubo_gather_table[i].reg); + break; + } + } + + if (reg.file != UNIFORM) { + reg = dst_reg(UNIFORM, param_index); + uniform_vector_size[param_index] = ir->type->vector_elements; + + int gather = this->nr_ubo_gather_table++; + this->ubo_gather_table[gather].reg = reg.reg; + this->ubo_gather_table[gather].const_block = + const_uniform_block->value.u[0]; + this->ubo_gather_table[gather].const_offset = const_offset; + + for (int i = 0; i < ir->type->vector_elements; i++) { + this->ubo_gather_table[gather].channel_mask |= (1 << i); + } + this->ubo_gather_table[gather].channel_mask <<= (const_offset % 16) / 4; + this->ubo_uniforms += ir->type->vector_elements; + } + reg.type = brw_type_for_base_type(ir->type); + + src_reg consts = src_reg(reg); + consts.swizzle = brw_swizzle_for_size(ir->type->vector_elements); + + if (ir->type->base_type == GLSL_TYPE_BOOL) { + emit(CMP(result_dst, consts, src_reg(0u), BRW_CONDITIONAL_NZ)); + } else { + this->result = consts; + } + + return true; +} + vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, void *log_data, struct gl_program *prog, @@ -3727,6 +3794,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; this->uniforms = 0; + this->ubo_uniforms = 0; /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires * at least one. See setup_uniforms() in brw_vec4.cpp. @@ -3737,8 +3805,15 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1); } + /* Gather constants hardware treats each fetch in 16-byte units + * So reflect size of each UBO fetch as vectors even if they contain + * less than 4 components + */ + this->uniform_array_size += stage_prog_data->nr_ubo_params; this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); + this->ubo_gather_table = rzalloc_array(mem_ctx, backend_shader::gather_table, + this->uniform_array_size); } vec4_visitor::~vec4_visitor() -- 1.9.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev