---
src/amd/common/ac_nir_to_llvm.c | 24
++++++++++++---------
src/amd/common/ac_shader_abi.h | 7 ++++++
src/gallium/drivers/radeonsi/si_shader.c | 1 +
src/gallium/drivers/radeonsi/si_shader_internal.h | 5 +++++
src/gallium/drivers/radeonsi/si_shader_nir.c | 26
+++++++++++++++++++++++
5 files changed, 53 insertions(+), 10 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c
b/src/amd/common/ac_nir_to_llvm.c
index 158e954fa8..483dd52b36 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2854,32 +2854,31 @@ load_tes_input(struct nir_to_llvm_context *ctx,
buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
result = ac_build_buffer_load(&ctx->ac,
ctx->hs_ring_tess_offchip, instr->num_components, NULL,
buf_addr, ctx->oc_lds, is_compact ? (4 *
const_index) : 0, 1, 0, true, false);
result = trim_vector(&ctx->ac, result, instr->num_components);
result = LLVMBuildBitCast(ctx->builder, result,
get_def_type(ctx->nir, &instr->dest.ssa), "");
return result;
}
static LLVMValueRef
-load_gs_input(struct nir_to_llvm_context *ctx,
- nir_intrinsic_instr *instr)
+load_gs_input(struct ac_shader_abi *abi,
+ nir_intrinsic_instr *instr,
+ unsigned vertex_index,
+ unsigned const_index)
{
- LLVMValueRef indir_index, vtx_offset;
- unsigned const_index;
+ struct nir_to_llvm_context *ctx =
nir_to_llvm_context_from_abi(abi);
+ LLVMValueRef vtx_offset;
LLVMValueRef args[9];
unsigned param, vtx_offset_param;
LLVMValueRef value[4], result;
- unsigned vertex_index;
- get_deref_offset(ctx->nir, instr->variables[0],
- false, &vertex_index, NULL,
- &const_index, &indir_index);
+
vtx_offset_param = vertex_index;
assert(vtx_offset_param < 6);
vtx_offset = LLVMBuildMul(ctx->builder,
ctx->gs_vtx_offset[vtx_offset_param],
LLVMConstInt(ctx->ac.i32, 4, false), "");
param =
shader_io_get_unique_index(instr->variables[0]->var->data.location);
unsigned comp = instr->variables[0]->var->data.location_frac;
for (unsigned i = comp; i < instr->num_components + comp; i++) {
if (ctx->ac.chip_class >= GFX9) {
@@ -2966,21 +2965,26 @@ static LLVMValueRef visit_load_var(struct
ac_nir_context *ctx,
if (instr->dest.ssa.bit_size == 64)
ve *= 2;
switch (instr->variables[0]->var->data.mode) {
case nir_var_shader_in:
if (ctx->stage == MESA_SHADER_TESS_CTRL)
return load_tcs_input(ctx->nctx, instr);
if (ctx->stage == MESA_SHADER_TESS_EVAL)
return load_tes_input(ctx->nctx, instr);
if (ctx->stage == MESA_SHADER_GEOMETRY) {
- return load_gs_input(ctx->nctx, instr);
+ LLVMValueRef indir_index;
+ unsigned const_index, vertex_index;
+ get_deref_offset(ctx, instr->variables[0],
+ false, &vertex_index, NULL,
+ &const_index, &indir_index);
+ return ctx->abi->load_inputs(ctx->abi, instr,
vertex_index, const_index);
}
for (unsigned chan = comp; chan < ve + comp; chan++) {
if (indir_index) {
unsigned count = glsl_count_attribute_slots(
instr->variables[0]->var->type,
ctx->stage == MESA_SHADER_VERTEX);
count -= chan / 4;
LLVMValueRef tmp_vec =
ac_build_gather_values_extended(
&ctx->ac, ctx->abi->inputs + idx + chan,
count,
@@ -6489,22 +6493,22 @@ LLVMModuleRef
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
for(int i = 0; i < shader_count; ++i) {
ctx.stage = shaders[i]->info.stage;
ctx.output_mask = 0;
ctx.tess_outputs_written = 0;
ctx.num_output_clips =
shaders[i]->info.clip_distance_array_size;
ctx.num_output_culls =
shaders[i]->info.cull_distance_array_size;
if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
ctx.gs_next_vertex = ac_build_alloca(&ctx.ac,
ctx.ac.i32, "gs_next_vertex");
-
ctx.gs_max_out_vertices =
shaders[i]->info.gs.vertices_out;
+ ctx.abi.load_inputs = load_gs_input;
} else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
ctx.tes_primitive_mode =
shaders[i]->info.tess.primitive_mode;
} else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
if (shader_info->info.vs.needs_instance_id) {
ctx.shader_info->vs.vgpr_comp_cnt =
MAX2(3, ctx.shader_info->vs.vgpr_comp_cnt);
}
} else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
shader_info->fs.can_discard =
shaders[i]->info.fs.uses_discard;
}
diff --git a/src/amd/common/ac_shader_abi.h
b/src/amd/common/ac_shader_abi.h
index 27586d0212..6ba1a51e07 100644
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -19,20 +19,22 @@
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef AC_SHADER_ABI_H
#define AC_SHADER_ABI_H
#include <llvm-c/Core.h>
+#include "nir.h" >
+
enum ac_descriptor_type {
AC_DESC_IMAGE,
AC_DESC_FMASK,
AC_DESC_SAMPLER,
AC_DESC_BUFFER,
};
/* Document the shader ABI during compilation. This is what allows
radeonsi and
* radv to share a compiler backend.
*/
@@ -55,20 +57,25 @@ struct ac_shader_abi {
LLVMValueRef *inputs;
void (*emit_outputs)(struct ac_shader_abi *abi,
unsigned max_outputs,
LLVMValueRef *addrs);
void (*emit_vertex)(struct ac_shader_abi *abi,
unsigned stream,
LLVMValueRef *addrs);
+ LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
+ nir_intrinsic_instr *instr,
+ unsigned vertex_index,
+ unsigned const_index);
+
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi,
LLVMValueRef index);
/**
* Load the descriptor for the given buffer.
*
* \param buffer the buffer as presented in NIR: this is the
descriptor
* in Vulkan, and the buffer index in
OpenGL/Gallium
* \param write whether buffer contents will be written
*/
LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c
b/src/gallium/drivers/radeonsi/si_shader.c
index 06e3d0f9f1..746816d6a3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5814,20 +5814,21 @@ static bool si_compile_tgsi_main(struct
si_shader_context *ctx,
if (shader->key.as_es) {
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
bld_base->emit_epilogue = si_tgsi_emit_es_epilogue;
} else {
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
bld_base->emit_epilogue = si_tgsi_emit_epilogue;
}
break;
case PIPE_SHADER_GEOMETRY:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
+ ctx->abi.load_inputs = si_nir_load_input_gs;
ctx->abi.emit_vertex = si_llvm_emit_vertex;
ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue;
break;
case PIPE_SHADER_FRAGMENT:
ctx->load_input = declare_input_fs;
ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_COMPUTE:
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 58413e9947..afb723d2ef 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -332,11 +332,16 @@ void si_llvm_load_input_vs(
struct si_shader_context *ctx,
unsigned input_index,
LLVMValueRef out[4]);
void si_llvm_load_input_fs(
struct si_shader_context *ctx,
unsigned input_index,
LLVMValueRef out[4]);
bool si_nir_build_llvm(struct si_shader_context *ctx, struct
nir_shader *nir);
+LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
+ nir_intrinsic_instr *instr,
+ unsigned vertex_index,
+ unsigned const_index);
+
#endif
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index fca16f46cf..5b68ff2a07 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -480,20 +480,46 @@ static void declare_nir_input_fs(struct
si_shader_context *ctx,
out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
return;
}
si_llvm_load_input_fs(ctx, *fs_attr_idx, out);
(*fs_attr_idx)++;
}
+LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
+ nir_intrinsic_instr *instr,
+ unsigned vertex_index,
+ unsigned const_index)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+
+ nir_shader *nir = ctx->shader->selector->nir;
+ unsigned input_index = 0;
+ nir_foreach_variable(var, &nir->inputs) {
+ if (instr->variables[0]->var->data.location ==
var->data.location)
+ break;
+ input_index++;
+ }