Re: [Mesa-dev] [PATCH v2 08/13] i965: Handle TCS outputs and TES inputs.

2015-12-14 Thread Kenneth Graunke
On Saturday, December 12, 2015 10:21:19 PM Jordan Justen wrote:
> On 2015-12-11 13:23:57, Kenneth Graunke wrote:
> > TCS outputs and TES inputs both refer to a common "patch URB entry"
> > shared across all invocations.  First, there are some number of
> > per-patch entries.  Then, there are per-vertex entries accessed via
> > an offset for the variable and a stride times the vertex index.
> > 
> > Because these calculations need to be done in both the vec4 and scalar
> > backends, it's simpler to just compute the offset calculations in NIR.
> > It doesn't necessarily make much sense to use per-vertex intrinsics
> > afterwards, but that at least means we don't lose the per-patch vs.
> > per-vertex information.
> > 
> > Signed-off-by: Kenneth Graunke 
> > ---
> >  src/mesa/drivers/dri/i965/brw_nir.c | 122 
> > +++-
> >  1 file changed, 120 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
> > b/src/mesa/drivers/dri/i965/brw_nir.c
> > index 9cf4944..e46e177 100644
> > --- a/src/mesa/drivers/dri/i965/brw_nir.c
> > +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> > @@ -133,6 +133,69 @@ remap_inputs_with_vue_map(nir_block *block, void 
> > *closure)
> > return true;
> >  }
> >  
> > +struct remap_patch_urb_offsets_state {
> > +   nir_builder b;
> > +   struct brw_vue_map vue_map;
> > +};
> > +
> > +static bool
> > +remap_patch_urb_offsets(nir_block *block, void *closure)
> > +{
> > +   struct remap_patch_urb_offsets_state *state = closure;
> > +
> > +   nir_foreach_instr_safe(block, instr) {
> > +  if (instr->type != nir_instr_type_intrinsic)
> > + continue;
> > +
> > +  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> > +
> > +  bool is_input =
> > + intrin->intrinsic == nir_intrinsic_load_input ||
> > + intrin->intrinsic == nir_intrinsic_load_per_vertex_input;
> > +
> > +  bool is_output =
> > + intrin->intrinsic == nir_intrinsic_load_output ||
> > + intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
> > + intrin->intrinsic == nir_intrinsic_store_output ||
> > + intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
> 
> Can you call the functions you added previously? (i965: Separate base
> offset/constant offset combining from remapping.)
> 
> 7 & 8 Reviewed-by: Jordan Justen 

Oh, good call - I wrote this patch first, even though it came later in
the series.  That makes a ton of sense.  I've changed it to:

+  gl_shader_stage stage = state->b.shader->stage;
+
+  if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) ||
+  (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) {


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 08/13] i965: Handle TCS outputs and TES inputs.

2015-12-12 Thread Jordan Justen
On 2015-12-11 13:23:57, Kenneth Graunke wrote:
> TCS outputs and TES inputs both refer to a common "patch URB entry"
> shared across all invocations.  First, there are some number of
> per-patch entries.  Then, there are per-vertex entries accessed via
> an offset for the variable and a stride times the vertex index.
> 
> Because these calculations need to be done in both the vec4 and scalar
> backends, it's simpler to just compute the offset calculations in NIR.
> It doesn't necessarily make much sense to use per-vertex intrinsics
> afterwards, but that at least means we don't lose the per-patch vs.
> per-vertex information.
> 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_nir.c | 122 
> +++-
>  1 file changed, 120 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
> b/src/mesa/drivers/dri/i965/brw_nir.c
> index 9cf4944..e46e177 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -133,6 +133,69 @@ remap_inputs_with_vue_map(nir_block *block, void 
> *closure)
> return true;
>  }
>  
> +struct remap_patch_urb_offsets_state {
> +   nir_builder b;
> +   struct brw_vue_map vue_map;
> +};
> +
> +static bool
> +remap_patch_urb_offsets(nir_block *block, void *closure)
> +{
> +   struct remap_patch_urb_offsets_state *state = closure;
> +
> +   nir_foreach_instr_safe(block, instr) {
> +  if (instr->type != nir_instr_type_intrinsic)
> + continue;
> +
> +  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> +
> +  bool is_input =
> + intrin->intrinsic == nir_intrinsic_load_input ||
> + intrin->intrinsic == nir_intrinsic_load_per_vertex_input;
> +
> +  bool is_output =
> + intrin->intrinsic == nir_intrinsic_load_output ||
> + intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
> + intrin->intrinsic == nir_intrinsic_store_output ||
> + intrin->intrinsic == nir_intrinsic_store_per_vertex_output;

Can you call the functions you added previously? (i965: Separate base
offset/constant offset combining from remapping.)

7 & 8 Reviewed-by: Jordan Justen 

> +
> +  if ((state->b.shader->stage == MESA_SHADER_TESS_CTRL && is_output) ||
> +  (state->b.shader->stage == MESA_SHADER_TESS_EVAL && is_input)) {
> + int vue_slot = 
> state->vue_map.varying_to_slot[intrin->const_index[0]];
> + assert(vue_slot != -1);
> + intrin->const_index[0] = vue_slot;
> +
> + nir_src *vertex = nir_get_io_vertex_index_src(intrin);
> + if (vertex) {
> +nir_const_value *const_vertex = nir_src_as_const_value(*vertex);
> +if (const_vertex) {
> +   intrin->const_index[0] += const_vertex->u[0] *
> + state->vue_map.num_per_vertex_slots;
> +} else {
> +   state->b.cursor = nir_before_instr(>instr);
> +
> +   /* Multiply by the number of per-vertex slots. */
> +   nir_ssa_def *vertex_offset =
> +  nir_imul(>b,
> +   nir_ssa_for_src(>b, *vertex, 1),
> +   nir_imm_int(>b,
> +   state->vue_map.num_per_vertex_slots));
> +
> +   /* Add it to the existing offset */
> +   nir_src *offset = nir_get_io_offset_src(intrin);
> +   nir_ssa_def *total_offset =
> +  nir_iadd(>b, vertex_offset,
> +   nir_ssa_for_src(>b, *offset, 1));
> +
> +   nir_instr_rewrite_src(>instr, offset,
> + nir_src_for_ssa(total_offset));
> +}
> + }
> +  }
> +   }
> +   return true;
> +}
> +
>  static void
>  brw_nir_lower_inputs(nir_shader *nir,
>   const struct brw_device_info *devinfo,
> @@ -223,6 +286,31 @@ brw_nir_lower_inputs(nir_shader *nir,
>}
>break;
> }
> +   case MESA_SHADER_TESS_EVAL: {
> +  struct remap_patch_urb_offsets_state state;
> +  brw_compute_tess_vue_map(_map,
> +   nir->info.inputs_read & 
> ~VARYING_BIT_PRIMITIVE_ID,
> +   nir->info.patch_inputs_read);
> +
> +  foreach_list_typed(nir_variable, var, node, >inputs) {
> + var->data.driver_location = var->data.location;
> +  }
> +
> +  nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
> +
> +  /* This pass needs actual constants */
> +  nir_opt_constant_folding(nir);
> +
> +  nir_foreach_overload(nir, overload) {
> + if (overload->impl) {
> +nir_builder_init(, overload->impl);
> +nir_foreach_block(overload->impl, add_const_offset_to_base, 
> );
> +nir_builder_init(, overload->impl);
> +nir_foreach_block(overload->impl, 

[Mesa-dev] [PATCH v2 08/13] i965: Handle TCS outputs and TES inputs.

2015-12-11 Thread Kenneth Graunke
TCS outputs and TES inputs both refer to a common "patch URB entry"
shared across all invocations.  First, there are some number of
per-patch entries.  Then, there are per-vertex entries accessed via
an offset for the variable and a stride times the vertex index.

Because these calculations need to be done in both the vec4 and scalar
backends, it's simpler to just compute the offset calculations in NIR.
It doesn't necessarily make much sense to use per-vertex intrinsics
afterwards, but that at least means we don't lose the per-patch vs.
per-vertex information.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_nir.c | 122 +++-
 1 file changed, 120 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c 
b/src/mesa/drivers/dri/i965/brw_nir.c
index 9cf4944..e46e177 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -133,6 +133,69 @@ remap_inputs_with_vue_map(nir_block *block, void *closure)
return true;
 }
 
+struct remap_patch_urb_offsets_state {
+   nir_builder b;
+   struct brw_vue_map vue_map;
+};
+
+static bool
+remap_patch_urb_offsets(nir_block *block, void *closure)
+{
+   struct remap_patch_urb_offsets_state *state = closure;
+
+   nir_foreach_instr_safe(block, instr) {
+  if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+  nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+  bool is_input =
+ intrin->intrinsic == nir_intrinsic_load_input ||
+ intrin->intrinsic == nir_intrinsic_load_per_vertex_input;
+
+  bool is_output =
+ intrin->intrinsic == nir_intrinsic_load_output ||
+ intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
+ intrin->intrinsic == nir_intrinsic_store_output ||
+ intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
+
+  if ((state->b.shader->stage == MESA_SHADER_TESS_CTRL && is_output) ||
+  (state->b.shader->stage == MESA_SHADER_TESS_EVAL && is_input)) {
+ int vue_slot = state->vue_map.varying_to_slot[intrin->const_index[0]];
+ assert(vue_slot != -1);
+ intrin->const_index[0] = vue_slot;
+
+ nir_src *vertex = nir_get_io_vertex_index_src(intrin);
+ if (vertex) {
+nir_const_value *const_vertex = nir_src_as_const_value(*vertex);
+if (const_vertex) {
+   intrin->const_index[0] += const_vertex->u[0] *
+ state->vue_map.num_per_vertex_slots;
+} else {
+   state->b.cursor = nir_before_instr(>instr);
+
+   /* Multiply by the number of per-vertex slots. */
+   nir_ssa_def *vertex_offset =
+  nir_imul(>b,
+   nir_ssa_for_src(>b, *vertex, 1),
+   nir_imm_int(>b,
+   state->vue_map.num_per_vertex_slots));
+
+   /* Add it to the existing offset */
+   nir_src *offset = nir_get_io_offset_src(intrin);
+   nir_ssa_def *total_offset =
+  nir_iadd(>b, vertex_offset,
+   nir_ssa_for_src(>b, *offset, 1));
+
+   nir_instr_rewrite_src(>instr, offset,
+ nir_src_for_ssa(total_offset));
+}
+ }
+  }
+   }
+   return true;
+}
+
 static void
 brw_nir_lower_inputs(nir_shader *nir,
  const struct brw_device_info *devinfo,
@@ -223,6 +286,31 @@ brw_nir_lower_inputs(nir_shader *nir,
   }
   break;
}
+   case MESA_SHADER_TESS_EVAL: {
+  struct remap_patch_urb_offsets_state state;
+  brw_compute_tess_vue_map(_map,
+   nir->info.inputs_read & 
~VARYING_BIT_PRIMITIVE_ID,
+   nir->info.patch_inputs_read);
+
+  foreach_list_typed(nir_variable, var, node, >inputs) {
+ var->data.driver_location = var->data.location;
+  }
+
+  nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+
+  /* This pass needs actual constants */
+  nir_opt_constant_folding(nir);
+
+  nir_foreach_overload(nir, overload) {
+ if (overload->impl) {
+nir_builder_init(, overload->impl);
+nir_foreach_block(overload->impl, add_const_offset_to_base, 
);
+nir_builder_init(, overload->impl);
+nir_foreach_block(overload->impl, remap_patch_urb_offsets, );
+ }
+  }
+  break;
+   }
case MESA_SHADER_FRAGMENT:
   assert(is_scalar);
   nir_assign_var_locations(>inputs, >num_inputs,
@@ -238,7 +326,9 @@ brw_nir_lower_inputs(nir_shader *nir,
 }
 
 static void
-brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
+brw_nir_lower_outputs(nir_shader *nir,
+  const struct brw_device_info *devinfo,
+  bool is_scalar)
 {
switch (nir->stage) {
case