If a non-const sample number is given to interpolateAtSample it will
now generate an indirect send message with the sample ID similar to
how non-const sampler array indexing works. Previously non-const
values were ignored and instead it ended up using a constant 0 value.

The generator will try to determine if the sample ID is dynamically
uniform via nir_src_is_dynamically_uniform. If not it will query the
pixel interpolator in a loop, once for each possible sample number.
This is necessary because the indirect send message doesn't seem to
have a way to specify a different value for each fragment.

The range of possible sample numbers is determined using
STATE_NUM_SAMPLES. When linking the shader it will now add a reference
to this state if any dynamically non-uniform calls to
interpolateAtSample are found.

This fixes the following two Piglit tests:

arb_gpu_shader5-interpolateAtSample-nonconst
arb_gpu_shader5-interpolateAtSample-dynamically-nonuniform

v2: Handle dynamically non-uniform sample ids.
---
 src/mesa/drivers/dri/i965/brw_eu.h             |   2 +-
 src/mesa/drivers/dri/i965/brw_eu_emit.c        |  34 ++++---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |   5 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp       | 119 +++++++++++++++++++++----
 src/mesa/drivers/dri/i965/brw_program.c        |  54 +++++++++++
 src/mesa/drivers/dri/i965/brw_program.h        |   1 +
 src/mesa/drivers/dri/i965/brw_shader.cpp       |   2 +
 7 files changed, 185 insertions(+), 32 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index 761aa0e..0ac1ad9 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -461,7 +461,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              struct brw_reg mrf,
                              bool noperspective,
                              unsigned mode,
-                             unsigned data,
+                             struct brw_reg data,
                              unsigned msg_length,
                              unsigned response_length);
 
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 4d39762..25524d4 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -3192,26 +3192,38 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
                              struct brw_reg mrf,
                              bool noperspective,
                              unsigned mode,
-                             unsigned data,
+                             struct brw_reg data,
                              unsigned msg_length,
                              unsigned response_length)
 {
    const struct brw_device_info *devinfo = p->devinfo;
-   struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
+   struct brw_inst *insn;
+   uint16_t exec_size;
 
-   brw_set_dest(p, insn, dest);
-   brw_set_src0(p, insn, mrf);
-   brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
-                              msg_length, response_length,
-                              false /* header is never present for PI */,
-                              false);
+   if (data.file == BRW_IMMEDIATE_VALUE) {
+      insn = next_insn(p, BRW_OPCODE_SEND);
+      brw_set_dest(p, insn, dest);
+      brw_set_src0(p, insn, mrf);
+      brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
+                                 msg_length, response_length,
+                                 false /* header is never present for PI */,
+                                 false);
+      brw_inst_set_pi_message_data(devinfo, insn, data.dw1.ud);
+   } else {
+      insn = brw_send_indirect_message(p,
+                                       GEN7_SFID_PIXEL_INTERPOLATOR,
+                                       dest,
+                                       mrf,
+                                       vec1(data));
+      brw_inst_set_mlen(devinfo, insn, msg_length);
+      brw_inst_set_rlen(devinfo, insn, response_length);
+   }
 
-   brw_inst_set_pi_simd_mode(
-         devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16);
+   exec_size = brw_inst_exec_size(devinfo, p->current);
+   brw_inst_set_pi_simd_mode(devinfo, insn, exec_size == BRW_EXECUTE_16);
    brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px 
dispatch */
    brw_inst_set_pi_nopersp(devinfo, insn, noperspective);
    brw_inst_set_pi_message_type(devinfo, insn, mode);
-   brw_inst_set_pi_message_data(devinfo, insn, data);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c86ca04..88dbc62 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1328,15 +1328,14 @@ fs_generator::generate_pixel_interpolator_query(fs_inst 
*inst,
                                                 struct brw_reg msg_data,
                                                 unsigned msg_type)
 {
-   assert(msg_data.file == BRW_IMMEDIATE_VALUE &&
-          msg_data.type == BRW_REGISTER_TYPE_UD);
+   assert(msg_data.type == BRW_REGISTER_TYPE_UD);
 
    brw_pixel_interpolator_query(p,
          retype(dst, BRW_REGISTER_TYPE_UW),
          src,
          inst->pi_noperspective,
          msg_type,
-         msg_data.dw1.ud,
+         msg_data,
          inst->mlen,
          inst->regs_written);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index ee964a0..0fdb9ae 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1167,6 +1167,47 @@ fs_visitor::emit_percomp(const fs_builder &bld, const 
fs_inst &inst,
    }
 }
 
+/* For most messages, we need one reg of ignored data; the hardware requires
+ * mlen==1 even when there is no payload. in the per-slot offset case, we'll
+ * replace this with the proper source data.
+ */
+static void
+setup_pixel_interpolater_instruction(fs_visitor *v,
+                                     nir_intrinsic_instr *instr,
+                                     fs_inst *inst,
+                                     int mlen = 1)
+{
+   inst->mlen = mlen;
+   /* 2 floats per slot returned */
+   inst->regs_written = 2 * v->dispatch_width / 8;
+   inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
+      INTERP_QUALIFIER_NOPERSPECTIVE;
+}
+
+static fs_reg
+get_num_samples_reg(fs_visitor *v)
+{
+   struct gl_program_parameter_list *params = v->prog->Parameters;
+   static gl_state_index tokens[STATE_LENGTH] = {
+      STATE_NUM_SAMPLES
+   };
+   GLuint index = _mesa_add_state_reference(params, tokens);
+   unsigned i;
+
+   /* Try to find an existing copy of the uniform */
+   for (i = 0; i < v->uniforms; i++) {
+      if (v->stage_prog_data->param[i] ==
+          &v->prog->Parameters->ParameterValues[index][0])
+         goto found;
+   }
+
+   v->stage_prog_data->param[v->uniforms++] =
+      &v->prog->Parameters->ParameterValues[index][0];
+
+found:
+   return retype(fs_reg(UNIFORM, i), BRW_REGISTER_TYPE_UD);
+}
+
 void
 fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr 
*instr)
 {
@@ -1438,27 +1479,73 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
 
       fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
 
-      /* For most messages, we need one reg of ignored data; the hardware
-       * requires mlen==1 even when there is no payload. in the per-slot
-       * offset case, we'll replace this with the proper source data.
-       */
       fs_reg src = vgrf(glsl_type::float_type);
-      int mlen = 1;     /* one reg unless overriden */
       fs_inst *inst;
 
       switch (instr->intrinsic) {
       case nir_intrinsic_interp_var_at_centroid:
          inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_CENTROID,
                          dst_xy, src, fs_reg(0u));
+         setup_pixel_interpolater_instruction(this, instr, inst);
          break;
 
       case nir_intrinsic_interp_var_at_sample: {
-         /* XXX: We should probably handle non-constant sample id's */
          nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
-         assert(const_sample);
-         unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
-         inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
-                         fs_reg(msg_data));
+
+         if (const_sample) {
+            unsigned msg_data = const_sample->i[0] << 4;
+
+            inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
+                            fs_reg(msg_data));
+
+            setup_pixel_interpolater_instruction(this, instr, inst);
+         } else {
+            fs_reg sample_src = retype(get_nir_src(instr->src[0]),
+                                       BRW_REGISTER_TYPE_UD);
+            fs_reg sample_id_reg;
+
+            if (nir_src_is_dynamically_uniform(instr->src[0])) {
+               sample_id_reg = vgrf(glsl_type::uint_type);
+               bld.SHL(sample_id_reg, sample_src, fs_reg(4u));
+               sample_id_reg = bld.emit_uniformize(sample_id_reg);
+               inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
+                               sample_id_reg);
+               setup_pixel_interpolater_instruction(this, instr, inst);
+            } else {
+               /* Make a loop that sends a message to the pixel interpolator
+                * for each possible sample number so that each individual
+                * message will be dynamically uniform. The number of samples
+                * is determined by accessing the STATE_NUM_SAMPLES state var.
+                */
+               fs_reg i_reg = vgrf(glsl_type::uint_type);
+               fs_reg sample_id_reg = vgrf(glsl_type::uint_type);
+               fs_reg num_samples_reg = get_num_samples_reg(this);
+
+               bld.MOV(i_reg, fs_reg(0u));
+
+               bld.emit(BRW_OPCODE_DO);
+
+               bld.CMP(bld.null_reg_ud(),
+                       sample_src, i_reg,
+                       BRW_CONDITIONAL_EQ);
+               bld.IF(BRW_PREDICATE_NORMAL);
+               bld.SHL(sample_id_reg, i_reg, fs_reg(4u));
+               sample_id_reg = bld.emit_uniformize(sample_id_reg);
+               inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
+                               sample_id_reg);
+               setup_pixel_interpolater_instruction(this, instr, inst);
+               bld.emit(BRW_OPCODE_ENDIF);
+
+               bld.ADD(i_reg, i_reg, fs_reg(1u));
+               bld.CMP(bld.null_reg_ud(),
+                       i_reg, num_samples_reg,
+                       BRW_CONDITIONAL_GE);
+               inst = bld.emit(BRW_OPCODE_BREAK);
+               inst->predicate = BRW_PREDICATE_NORMAL;
+               bld.emit(BRW_OPCODE_WHILE);
+            }
+         }
+
          break;
       }
 
@@ -1471,6 +1558,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
 
             inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, 
src,
                             fs_reg(off_x | (off_y << 4)));
+            setup_pixel_interpolater_instruction(this, instr, inst);
          } else {
             src = vgrf(glsl_type::ivec2_type);
             fs_reg offset_src = retype(get_nir_src(instr->src[0]),
@@ -1500,9 +1588,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
                            bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
             }
 
-            mlen = 2 * dispatch_width / 8;
             inst = bld.emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, 
src,
                             fs_reg(0u));
+            setup_pixel_interpolater_instruction(this,
+                                                 instr,
+                                                 inst,
+                                                 2 * dispatch_width / 8);
          }
          break;
       }
@@ -1511,12 +1602,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
          unreachable("Invalid intrinsic");
       }
 
-      inst->mlen = mlen;
-      /* 2 floats per slot returned */
-      inst->regs_written = 2 * dispatch_width / 8;
-      inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
-                               INTERP_QUALIFIER_NOPERSPECTIVE;
-
       for (unsigned j = 0; j < instr->num_components; j++) {
          fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
          src.type = dest.type;
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 467a893..6430c5d 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -146,6 +146,8 @@ brwProgramStringNotify(struct gl_context *ctx,
          prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, 
true);
       }
 
+      brw_add_interpolate_at_sample_params(prog);
+
       brw_fs_precompile(ctx, NULL, prog);
       break;
    }
@@ -246,6 +248,58 @@ brw_add_texrect_params(struct gl_program *prog)
    }
 }
 
+static bool
+find_interpolate_at_sample_in_block(nir_block *block,
+                                    void *data)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrinsic_instr = nir_instr_as_intrinsic(instr);
+
+      if (intrinsic_instr->intrinsic != nir_intrinsic_interp_var_at_sample)
+         continue;
+
+      /* If the sample number is known to be dynamically uniform then
+       * the generator won't need the num_samples state.
+       */
+      if (nir_src_is_dynamically_uniform(intrinsic_instr->src[0]))
+         continue;
+
+      return false;
+   }
+
+   return true;
+}
+
+void
+brw_add_interpolate_at_sample_params(struct gl_program *prog)
+{
+   static gl_state_index tokens[STATE_LENGTH] = {
+      STATE_NUM_SAMPLES
+   };
+
+   if (!prog->nir)
+      return;
+
+   /* If anything calls interpolateAtSample with a dynamically non-uniform
+    * sample ID then we need STATE_NUM_SAMPLES to be able to iterate over
+    * each possible value.
+    */
+   nir_foreach_overload(prog->nir, overload) {
+      if (overload->impl) {
+         bool found = !nir_foreach_block(overload->impl,
+                                         find_interpolate_at_sample_in_block,
+                                         ralloc_parent(overload->impl));
+         if (found) {
+            _mesa_add_state_reference(prog->Parameters, tokens);
+            break;
+         }
+      }
+   }
+}
+
 /* Per-thread scratch space is a power-of-two multiple of 1KB. */
 int
 brw_get_scratch_size(int size)
diff --git a/src/mesa/drivers/dri/i965/brw_program.h 
b/src/mesa/drivers/dri/i965/brw_program.h
index eaa7e4e..c41ee96 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -167,6 +167,7 @@ bool brw_debug_recompile_sampler_key(struct brw_context 
*brw,
                                      const struct brw_sampler_prog_key_data 
*old_key,
                                      const struct brw_sampler_prog_key_data 
*key);
 void brw_add_texrect_params(struct gl_program *prog);
+void brw_add_interpolate_at_sample_params(struct gl_program *prog);
 
 void
 brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index bccf8d6..c76f4f7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -407,6 +407,8 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
                                     is_scalar_shader_stage(brw, stage));
       }
 
+      brw_add_interpolate_at_sample_params(prog);
+
       _mesa_reference_program(ctx, &prog, NULL);
    }
 
-- 
1.9.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to