vs: Map scalar VS input locations properly; avoid tons of MOVs.

Kenneth Graunke Mon, 24 Aug 2015 17:20:44 -0700

On Monday, August 17, 2015 09:48:13 PM Jason Ekstrand wrote:
> On Aug 17, 2015 4:08 PM, "Kenneth Graunke" <kenn...@whitecape.org> wrote:
> >
> > Previously, we used nir_lower_io with the scalar type_size function,
> > which mapped VERT_ATTRIB_* locations to...some numbers.  Then, in
> > fs_visitor::nir_setup_inputs(), we created temporaries indexed by
> > those numbers, and emitted MOVs from the actual ATTR registers to
> > those temporaries.
> >
> > This patch reworks our input lowering to produce NIR lower_input
> > intrinsics that properly index into the ATTR file, so we can access
> > it directly.
> >
> > No changes in shader-db.
> >
> > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.cpp     |  2 +-
> >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 71
> +++++++++-----------------------
> >  src/mesa/drivers/dri/i965/brw_nir.c      | 25 ++++++++++-
> >  3 files changed, 45 insertions(+), 53 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > index 93adbc6..efabb52 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > @@ -4527,7 +4527,7 @@ fs_visitor::dump_instruction(backend_instruction
> *be_inst, FILE *file)
> >           fprintf(file, "***m%d***", inst->src[i].reg);
> >           break;
> >        case ATTR:
> > -         fprintf(file, "attr%d", inst->src[i].reg +
> inst->src[i].reg_offset);
> > +         fprintf(file, "attr%d+%d", inst->src[i].reg,
> inst->src[i].reg_offset);
> >           break;
> >        case UNIFORM:
> >           fprintf(file, "u%d", inst->src[i].reg +
> inst->src[i].reg_offset);
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > index 51ba23b..397cdcb 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> > @@ -56,61 +56,25 @@ fs_visitor::emit_nir_code()
> >  void
> >  fs_visitor::nir_setup_inputs(nir_shader *shader)
> >  {
> > +   if (stage != MESA_SHADER_FRAGMENT)
> > +      return;
> > +
> >     nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_inputs);
> >
> >     foreach_list_typed(nir_variable, var, node, &shader->inputs) {
> > -      enum brw_reg_type type = brw_type_for_base_type(var->type);
> >        fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
> >
> >        fs_reg reg;
> > -      switch (stage) {
> > -      case MESA_SHADER_VERTEX: {
> > -         /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value
> > -          * stored in nir_variable::location.
> > -          *
> > -          * However, NIR's load_input intrinsics use a different index -
> an
> > -          * offset into a single contiguous array containing all inputs.
> > -          * This index corresponds to the nir_variable::driver_location
> field.
> > -          *
> > -          * So, we need to copy from fs_reg(ATTR, var->location) to
> > -          * offset(nir_inputs, var->data.driver_location).
> > -          */
> > -         const glsl_type *const t = var->type->without_array();
> > -         const unsigned components = t->components();
> > -         const unsigned cols = t->matrix_columns;
> > -         const unsigned elts = t->vector_elements;
> > -         unsigned array_length = var->type->is_array() ?
> var->type->length : 1;
> > -         for (unsigned i = 0; i < array_length; i++) {
> > -            for (unsigned j = 0; j < cols; j++) {
> > -               for (unsigned k = 0; k < elts; k++) {
> > -                  bld.MOV(offset(retype(input, type), bld,
> > -                                 components * i + elts * j + k),
> > -                          offset(fs_reg(ATTR, var->data.location + i,
> type),
> > -                                 bld, 4 * j + k));
> > -               }
> > -            }
> > -         }
> > -         break;
> > -      }
> > -      case MESA_SHADER_GEOMETRY:
> > -      case MESA_SHADER_COMPUTE:
> > -      case MESA_SHADER_TESS_CTRL:
> > -      case MESA_SHADER_TESS_EVAL:
> > -         unreachable("fs_visitor not used for these stages yet.");
> > -         break;
> > -      case MESA_SHADER_FRAGMENT:
> > -         if (var->data.location == VARYING_SLOT_POS) {
> > -            reg =
> *emit_fragcoord_interpolation(var->data.pixel_center_integer,
> > -
> var->data.origin_upper_left);
> > -            emit_percomp(bld, fs_inst(BRW_OPCODE_MOV,
> bld.dispatch_width(),
> > -                                      input, reg), 0xF);
> > -         } else {
> > -            emit_general_interpolation(input, var->name, var->type,
> > -                                       (glsl_interp_qualifier)
> var->data.interpolation,
> > -                                       var->data.location,
> var->data.centroid,
> > -                                       var->data.sample);
> > -         }
> > -         break;
> > +      if (var->data.location == VARYING_SLOT_POS) {
> > +         reg =
> *emit_fragcoord_interpolation(var->data.pixel_center_integer,
> > +
>  var->data.origin_upper_left);
> > +         emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
> > +                                   input, reg), 0xF);
> > +      } else {
> > +         emit_general_interpolation(input, var->name, var->type,
> > +                                    (glsl_interp_qualifier)
> var->data.interpolation,
> > +                                    var->data.location,
> var->data.centroid,
> > +                                    var->data.sample);
> >        }
> >     }
> >  }
> > @@ -1557,8 +1521,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder
> &bld, nir_intrinsic_instr *instr
> >     case nir_intrinsic_load_input: {
> >        unsigned index = 0;
> >        for (unsigned j = 0; j < instr->num_components; j++) {
> > -         fs_reg src = offset(retype(nir_inputs, dest.type), bld,
> > -                             instr->const_index[0] + index);
> > +         fs_reg src;
> > +         if (stage == MESA_SHADER_VERTEX) {
> > +            src = offset(fs_reg(ATTR, instr->const_index[0], dest.type),
> bld, index);
> > +         } else {
> > +            src = offset(retype(nir_inputs, dest.type), bld,
> > +                         instr->const_index[0] + index);
> > +         }
> >           if (has_indirect)
> >              src.reladdr = new(mem_ctx)
> fs_reg(get_nir_src(instr->src[0]));
> >           index++;
> > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
> b/src/mesa/drivers/dri/i965/brw_nir.c
> > index 224c207..2e2d02b 100644
> > --- a/src/mesa/drivers/dri/i965/brw_nir.c
> > +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> > @@ -28,14 +28,37 @@
> >  #include "program/prog_to_nir.h"
> >
> >  static void
> > +lower_scalar_vs_inputs(nir_shader *nir)
> > +{
> > +   /* Start with the location of the variable's base. */
> > +   foreach_list_typed(nir_variable, var, node, &nir->inputs) {
> > +      var->data.driver_location = var->data.location;
> > +   }
> > +
> > +   /* Now use nir_lower_io to walk dereference chains.  Attribute arrays
> > +    * are loaded as one vec4 per element (or matrix column), so we use
> > +    * type_size_vec4 here.
> > +    */
> > +   nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
> > +}
> 
> I think I'd rather leave this inline and not split it out. It's easier to
> see the interactions that way.


Sure, I can inline it if you prefer.  I think I originally expected
there to be more code here, but the bulk of it ended up in a
nir_foreach_block callback, so it's not too large.

> 
> > +
> > +static void
> >  brw_lower_nir_io_scalar(nir_shader *nir, gl_shader_stage stage)
> >  {
> >     nir_assign_var_locations_direct_first(nir, &nir->uniforms,
> >                                           &nir->num_direct_uniforms,
> >                                           &nir->num_uniforms,
> >                                           type_size_scalar);
> > -   nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
> type_size_scalar);
> >     nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
> type_size_scalar);
> > +
> > +   switch (stage) {
> > +   case MESA_SHADER_VERTEX:
> > +      lower_scalar_vs_inputs(nir);
> > +      break;
> > +   default:
> > +      nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
> type_size_scalar);
> 
> Again, is "regular" lowering really the right default here?  It seems
> FS-specific...

Well...given that we only support two stages right now...it's hard to
say.  It's what we were doing before.

I guess I could try rebasing my SIMD8 GS patches on this...

signature.asc
Description: This is a digitally signed message part.

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/8] i965/vs: Map scalar VS input locations properly; avoid tons of MOVs.

Reply via email to