When the const block and offset are immediate values. Otherwise just
fall-back to the previous method of uploading the UBO constant data to
GRF using pull constants.

Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com>
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp            | 13 ++++
 src/mesa/drivers/dri/i965/brw_vec4.h              |  2 +
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  2 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp    | 75 +++++++++++++++++++++++
 4 files changed, 92 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index f2b03f8..549fcd3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -604,6 +604,18 @@ vec4_visitor::generate_gather_table()
       stage_prog_data->gather_table[p].reg = -1;
       stage_prog_data->gather_table[p].channel_mask = 0xf;
    }
+
+   for (unsigned i = 0; i < this->nr_ubo_gather_table; i++) {
+      int p = stage_prog_data->nr_gather_table++;
+      stage_prog_data->gather_table[p].reg = this->ubo_gather_table[i].reg;
+      stage_prog_data->gather_table[p].channel_mask = 
this->ubo_gather_table[i].channel_mask;
+      stage_prog_data->gather_table[p].const_block = 
this->ubo_gather_table[i].const_block;
+      stage_prog_data->gather_table[p].const_offset = 
this->ubo_gather_table[i].const_offset;
+      stage_prog_data->max_ubo_const_block = 
MAX2(stage_prog_data->max_ubo_const_block,
+                                                  
this->ubo_gather_table[i].const_block);
+   }
+
+   stage_prog_data->nr_ubo_params = ubo_uniforms;
 }
 
 /**
@@ -1991,6 +2003,7 @@ brw_vs_emit(struct brw_context *brw,
                         vp, prog, brw_select_clip_planes(&brw->ctx),
                         mem_ctx, st_index,
                         !_mesa_is_gles3(&brw->ctx));
+      v.use_gather_constants = brw->vs_ubo_gather && 
brw->use_resource_streamer;
       if (!v.run()) {
          if (prog) {
             prog->LinkStatus = false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 534f1b1..0888ec7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -163,6 +163,7 @@ public:
    int *uniform_vector_size;
    int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
    int uniforms;
+   int ubo_uniforms;
 
    src_reg shader_start_time;
 
@@ -403,6 +404,7 @@ public:
    void dump_instruction(backend_instruction *inst, FILE *file);
 
    void visit_atomic_counter_intrinsic(ir_call *ir);
+   bool generate_ubo_gather_table(ir_expression *ir, const dst_reg 
&result_dst);
 
    bool is_high_sampler(src_reg sampler);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index b9694f6..5a85a21 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -679,6 +679,8 @@ brw_gs_emit(struct brw_context *brw,
 
          vec4_gs_visitor v(brw->intelScreen->compiler, brw,
                            c, prog, mem_ctx, true /* no_spills */, st_index);
+         v.use_gather_constants = brw->gs_ubo_gather &&
+            brw->use_resource_streamer;
          if (v.run()) {
             return generate_assembly(brw, prog, &c->gp->program.Base,
                                      &c->prog_data.base, mem_ctx, v.cfg,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f6e59ce..4bba4a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1828,6 +1828,12 @@ vec4_visitor::visit(ir_expression *ir)
       break;
 
    case ir_binop_ubo_load: {
+      /* Use gather push constants if at all possible, otherwise just
+       * fall back to pull constants for UBOs
+       */
+      if (generate_ubo_gather_table(ir, result_dst))
+         break;
+
       ir_constant *const_uniform_block = ir->operands[0]->as_constant();
       ir_constant *const_offset_ir = ir->operands[1]->as_constant();
       unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 
0;
@@ -3688,6 +3694,67 @@ vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, 
src_reg *reg)
    *reg = neg_result;
 }
 
+bool
+vec4_visitor::generate_ubo_gather_table(ir_expression *ir, const dst_reg 
&result_dst)
+{
+   ir_constant *const_uniform_block = ir->operands[0]->as_constant();
+   ir_constant *const_offset_ir = ir->operands[1]->as_constant();
+   unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
+
+   if (ir->operation != ir_binop_ubo_load ||
+       !use_gather_constants              ||
+       !const_uniform_block               ||
+       !const_offset_ir)
+      return false;
+
+   /* Only allow 32 registers (256 uniform components) as push constants,
+    */
+   int max_uniform_components = 32 * 8;
+   int param_index = uniforms + ubo_uniforms;
+   if ((param_index + ir->type->vector_elements) >= max_uniform_components)
+      return false;
+
+   dst_reg reg;
+   for (int i = 0; i < (int) this->nr_ubo_gather_table; i++) {
+      if ((this->ubo_gather_table[i].const_block ==
+           const_uniform_block->value.u[0]) &&
+          (this->ubo_gather_table[i].const_offset ==
+           const_offset)) {
+         reg = dst_reg(UNIFORM, this->ubo_gather_table[i].reg);
+         break;
+      }
+   }
+
+   if (reg.file != UNIFORM) {
+      reg = dst_reg(UNIFORM, param_index);
+      uniform_vector_size[param_index] = ir->type->vector_elements;
+
+      int gather = this->nr_ubo_gather_table++;
+      this->ubo_gather_table[gather].reg = reg.reg;
+      this->ubo_gather_table[gather].const_block =
+         const_uniform_block->value.u[0];
+      this->ubo_gather_table[gather].const_offset = const_offset;
+
+      for (int i = 0; i < ir->type->vector_elements; i++) {
+         this->ubo_gather_table[gather].channel_mask |= (1 << i);
+      }
+      this->ubo_gather_table[gather].channel_mask <<= (const_offset % 16) / 4;
+      this->ubo_uniforms += ir->type->vector_elements;
+   }
+   reg.type = brw_type_for_base_type(ir->type);
+
+   src_reg consts = src_reg(reg);
+   consts.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
+
+   if (ir->type->base_type == GLSL_TYPE_BOOL) {
+      emit(CMP(result_dst, consts, src_reg(0u), BRW_CONDITIONAL_NZ));
+   } else {
+      this->result = consts;
+   }
+
+   return true;
+}
+
 vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
                            void *log_data,
                            struct gl_program *prog,
@@ -3727,6 +3794,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler 
*compiler,
    this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
 
    this->uniforms = 0;
+   this->ubo_uniforms = 0;
 
    /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
     * at least one. See setup_uniforms() in brw_vec4.cpp.
@@ -3737,8 +3805,15 @@ vec4_visitor::vec4_visitor(const struct brw_compiler 
*compiler,
          MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
    }
 
+   /* Gather constants hardware treats each fetch in 16-byte units
+    * So reflect size of each UBO fetch as vectors even if they contain
+    * less than 4 components
+    */
+   this->uniform_array_size += stage_prog_data->nr_ubo_params;
    this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
    this->uniform_vector_size = rzalloc_array(mem_ctx, int, 
this->uniform_array_size);
+   this->ubo_gather_table = rzalloc_array(mem_ctx, 
backend_shader::gather_table,
+                                          this->uniform_array_size);
 }
 
 vec4_visitor::~vec4_visitor()
-- 
1.9.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to