Re: [Mesa-dev] [PATCH 17/20] ac: add si_nir_load_input_gs() to the abi

Timothy Arceri Wed, 15 Nov 2017 03:23:55 -0800

On 15/11/17 22:17, Nicolai Hähnle wrote:

On 15.11.2017 12:09, Timothy Arceri wrote:



On 15/11/17 21:56, Nicolai Hähnle wrote:

On 10.11.2017 04:13, Timothy Arceri wrote:

---

src/amd/common/ac_nir_to_llvm.c | 24++++++++++++---------

  src/amd/common/ac_shader_abi.h                    |  7 ++++++
  src/gallium/drivers/radeonsi/si_shader.c          |  1 +
  src/gallium/drivers/radeonsi/si_shader_internal.h |  5 +++++

src/gallium/drivers/radeonsi/si_shader_nir.c | 26+++++++++++++++++++++++

  5 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.cb/src/amd/common/ac_nir_to_llvm.c

index 158e954fa8..483dd52b36 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2854,32 +2854,31 @@ load_tes_input(struct nir_to_llvm_context *ctx,
      buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");

result = ac_build_buffer_load(&ctx->ac,ctx->hs_ring_tess_offchip, instr->num_components, NULL, buf_addr, ctx->oc_lds, is_compact ? (4 *const_index) : 0, 1, 0, true, false);

      result = trim_vector(&ctx->ac, result, instr->num_components);

result = LLVMBuildBitCast(ctx->builder, result,get_def_type(ctx->nir, &instr->dest.ssa), "");

      return result;
  }
  static LLVMValueRef
-load_gs_input(struct nir_to_llvm_context *ctx,
-          nir_intrinsic_instr *instr)
+load_gs_input(struct ac_shader_abi *abi,
+          nir_intrinsic_instr *instr,
+          unsigned vertex_index,
+          unsigned const_index)
  {
-    LLVMValueRef indir_index, vtx_offset;
-    unsigned const_index;

+ struct nir_to_llvm_context *ctx =nir_to_llvm_context_from_abi(abi);

+    LLVMValueRef vtx_offset;
      LLVMValueRef args[9];
      unsigned param, vtx_offset_param;
      LLVMValueRef value[4], result;
-    unsigned vertex_index;
-    get_deref_offset(ctx->nir, instr->variables[0],
-             false, &vertex_index, NULL,
-             &const_index, &indir_index);
+
      vtx_offset_param = vertex_index;
      assert(vtx_offset_param < 6);

vtx_offset = LLVMBuildMul(ctx->builder,ctx->gs_vtx_offset[vtx_offset_param],

                    LLVMConstInt(ctx->ac.i32, 4, false), "");

param =shader_io_get_unique_index(instr->variables[0]->var->data.location);

      unsigned comp = instr->variables[0]->var->data.location_frac;
      for (unsigned i = comp; i < instr->num_components + comp; i++) {
          if (ctx->ac.chip_class >= GFX9) {

@@ -2966,21 +2965,26 @@ static LLVMValueRef visit_load_var(structac_nir_context *ctx,

      if (instr->dest.ssa.bit_size == 64)
          ve *= 2;
      switch (instr->variables[0]->var->data.mode) {
      case nir_var_shader_in:
          if (ctx->stage == MESA_SHADER_TESS_CTRL)
              return load_tcs_input(ctx->nctx, instr);
          if (ctx->stage == MESA_SHADER_TESS_EVAL)
              return load_tes_input(ctx->nctx, instr);
          if (ctx->stage == MESA_SHADER_GEOMETRY) {
-            return load_gs_input(ctx->nctx, instr);
+                LLVMValueRef indir_index;
+                unsigned const_index, vertex_index;
+                get_deref_offset(ctx, instr->variables[0],
+                         false, &vertex_index, NULL,
+                         &const_index, &indir_index);


The indentation looks wrong here.

+ return ctx->abi->load_inputs(ctx->abi, instr,vertex_index, const_index);

          }
          for (unsigned chan = comp; chan < ve + comp; chan++) {
              if (indir_index) {
                  unsigned count = glsl_count_attribute_slots(
                          instr->variables[0]->var->type,
                          ctx->stage == MESA_SHADER_VERTEX);
                  count -= chan / 4;

LLVMValueRef tmp_vec =ac_build_gather_values_extended( &ctx->ac, ctx->abi->inputs + idx + chan,count,@@ -6489,22 +6493,22 @@ LLVMModuleRefac_translate_nir_to_llvm(LLVMTargetMachineRef tm,

      for(int i = 0; i < shader_count; ++i) {
          ctx.stage = shaders[i]->info.stage;
          ctx.output_mask = 0;
          ctx.tess_outputs_written = 0;

ctx.num_output_clips =shaders[i]->info.clip_distance_array_size; ctx.num_output_culls =shaders[i]->info.cull_distance_array_size;

          if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {

ctx.gs_next_vertex = ac_build_alloca(&ctx.ac,ctx.ac.i32, "gs_next_vertex");

ctx.gs_max_out_vertices =shaders[i]->info.gs.vertices_out;

+            ctx.abi.load_inputs = load_gs_input;
          } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {

ctx.tes_primitive_mode =shaders[i]->info.tess.primitive_mode;

          } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
              if (shader_info->info.vs.needs_instance_id) {
                  ctx.shader_info->vs.vgpr_comp_cnt =
                      MAX2(3, ctx.shader_info->vs.vgpr_comp_cnt);
              }
          } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {

shader_info->fs.can_discard =shaders[i]->info.fs.uses_discard;

diff --git a/src/amd/common/ac_shader_abi.hb/src/amd/common/ac_shader_abi.h

index 27586d0212..6ba1a51e07 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -19,20 +19,22 @@

* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THESOFTWARE OR THE

   * USE OR OTHER DEALINGS IN THE SOFTWARE.
   */
  #ifndef AC_SHADER_ABI_H
  #define AC_SHADER_ABI_H
  #include <llvm-c/Core.h>
+#include "nir.h" >
+
  enum ac_descriptor_type {
      AC_DESC_IMAGE,
      AC_DESC_FMASK,
      AC_DESC_SAMPLER,
      AC_DESC_BUFFER,
  };

/* Document the shader ABI during compilation. This is what allowsradeonsi and

   * radv to share a compiler backend.
   */
@@ -55,20 +57,25 @@ struct ac_shader_abi {
      LLVMValueRef *inputs;
      void (*emit_outputs)(struct ac_shader_abi *abi,
                   unsigned max_outputs,
                   LLVMValueRef *addrs);
      void (*emit_vertex)(struct ac_shader_abi *abi,
                  unsigned stream,
                  LLVMValueRef *addrs);
+    LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
+                    nir_intrinsic_instr *instr,
+                    unsigned vertex_index,
+                    unsigned const_index);
+

LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi,LLVMValueRef index);

      /**
       * Load the descriptor for the given buffer.
       *

* \param buffer the buffer as presented in NIR: this is thedescriptor * in Vulkan, and the buffer index inOpenGL/Gallium

       * \param write whether buffer contents will be written
       */
      LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,

diff --git a/src/gallium/drivers/radeonsi/si_shader.cb/src/gallium/drivers/radeonsi/si_shader.c

index 06e3d0f9f1..746816d6a3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c

@@ -5814,20 +5814,21 @@ static bool si_compile_tgsi_main(structsi_shader_context *ctx,

          if (shader->key.as_es) {
              ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
              bld_base->emit_epilogue = si_tgsi_emit_es_epilogue;
          } else {
              ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
              bld_base->emit_epilogue = si_tgsi_emit_epilogue;
          }
          break;
      case PIPE_SHADER_GEOMETRY:
          bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
+        ctx->abi.load_inputs = si_nir_load_input_gs;
          ctx->abi.emit_vertex = si_llvm_emit_vertex;
          ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
          bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue;
          break;
      case PIPE_SHADER_FRAGMENT:
          ctx->load_input = declare_input_fs;
          ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
          bld_base->emit_epilogue = si_tgsi_emit_epilogue;
          break;
      case PIPE_SHADER_COMPUTE:

diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.hb/src/gallium/drivers/radeonsi/si_shader_internal.h

index 58413e9947..afb723d2ef 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -332,11 +332,16 @@ void si_llvm_load_input_vs(
      struct si_shader_context *ctx,
      unsigned input_index,
      LLVMValueRef out[4]);
  void si_llvm_load_input_fs(
      struct si_shader_context *ctx,
      unsigned input_index,
      LLVMValueRef out[4]);

bool si_nir_build_llvm(struct si_shader_context *ctx, structnir_shader *nir);

+LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
+                  nir_intrinsic_instr *instr,
+                  unsigned vertex_index,
+                  unsigned const_index);
+
  #endif

diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.cb/src/gallium/drivers/radeonsi/si_shader_nir.c

index fca16f46cf..5b68ff2a07 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c

@@ -480,20 +480,46 @@ static void declare_nir_input_fs(structsi_shader_context *ctx,

          out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
          out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
                  LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
          return;
      }
      si_llvm_load_input_fs(ctx, *fs_attr_idx, out);
      (*fs_attr_idx)++;
  }
+LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
+                  nir_intrinsic_instr *instr,
+                  unsigned vertex_index,
+                  unsigned const_index)
+{
+    struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+
+    nir_shader *nir = ctx->shader->selector->nir;
+    unsigned input_index = 0;
+    nir_foreach_variable(var, &nir->inputs) {

+ if (instr->variables[0]->var->data.location ==var->data.location)

+            break;
+        input_index++;
+    }

This whole approach here seems wrong to me. The order in which inputvariables are declared shouldn't matter.

The TGSI code uses si_shader_io_get_unique_index, and I think the NIRpath should do the same (as does the NIR-path for radv, by the way...).

This code does use si_shader_io_get_unique_index() the input_index isused as a lookup e.g info->input_semantic_name[input_index] so that wecan get the parms to pass into si_shader_io_get_unique_index().

TGSI stores this input_index for later use, it's a TGSIism that weneed to work around.


Could we use var->data.driver_location like for vertex and fragment I/O?


Hmm...maybe.


Then the interface could be reduced to

load_input(abi, vertex_index, driver_location, const_index, component);

(not sure if const_index is still needed, actually)

It will also require some shuffling on the radv side to get to this butit might be doable. I'll give it a try tomorrow. Thanks!

Cheers,
Nicolai
That way, the dependency of ac_shader_abi on NIR can also be removed,which would be preferable.
Cheers,
Nicolai
+
+    LLVMValueRef value[4];
+    unsigned comp = instr->variables[0]->var->data.location_frac;
+    for (unsigned i = comp; i < instr->num_components + comp; i++) {
+ value[i] = si_llvm_load_input_gs(&ctx->abi, input_index,vertex_index,+ nir2llvmtype(ctx,instr->variables[0]->var->type),
+                         i);
+    }
+
+ return ac_build_varying_gather_values(&ctx->ac, value,instr->num_components, comp);
+}
+
  static LLVMValueRef
  si_nir_load_sampler_desc(struct ac_shader_abi *abi,
                   unsigned descriptor_set, unsigned base_index,
                   unsigned constant_index, LLVMValueRef dynamic_index,
                   enum ac_descriptor_type desc_type, bool image,
               bool write)
  {
      struct si_shader_context *ctx = si_shader_context_from_abi(abi);
      LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef list = LLVMGetParam(ctx->main_fn,ctx->param_samplers_and_images);

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 17/20] ac: add si_nir_load_input_gs() to the abi

Reply via email to