Module: Mesa
Branch: main
Commit: c68f9ed02084bc10a8f5a39e18975450c225a8b0
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c68f9ed02084bc10a8f5a39e18975450c225a8b0

Author: Rhys Perry <[email protected]>
Date:   Thu Oct 20 13:21:54 2022 +0100

radv/llvm: use the ring_offsets shader arg

Besides being nicer, this also fixes load_sample_positions_amd with LLVM.

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19202>

---

 src/amd/vulkan/radv_nir_lower_abi.c |  29 +-------
 src/amd/vulkan/radv_nir_to_llvm.c   | 131 ++----------------------------------
 src/amd/vulkan/radv_pipeline.c      |   4 +-
 src/amd/vulkan/radv_shader.h        |   3 +-
 src/amd/vulkan/radv_shader_args.c   |   4 +-
 5 files changed, 8 insertions(+), 163 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_lower_abi.c 
b/src/amd/vulkan/radv_nir_lower_abi.c
index 5ca6a86fc45..ad9eb7281e6 100644
--- a/src/amd/vulkan/radv_nir_lower_abi.c
+++ b/src/amd/vulkan/radv_nir_lower_abi.c
@@ -34,7 +34,6 @@ typedef struct {
    const struct radv_shader_args *args;
    const struct radv_shader_info *info;
    const struct radv_pipeline_key *pl_key;
-   bool use_llvm;
    uint32_t address32_hi;
    nir_ssa_def *gsvs_ring[4];
 } lower_abi_state;
@@ -128,22 +127,12 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void 
*state)
 
    switch (intrin->intrinsic) {
    case nir_intrinsic_load_ring_tess_factors_amd:
-      if (s->use_llvm) {
-         progress = false;
-         break;
-      }
-
       replacement = load_ring(b, RING_HS_TESS_FACTOR, s);
       break;
    case nir_intrinsic_load_ring_tess_factors_offset_amd:
       replacement = ac_nir_load_arg(b, &s->args->ac, 
s->args->ac.tcs_factor_offset);
       break;
    case nir_intrinsic_load_ring_tess_offchip_amd:
-      if (s->use_llvm) {
-         progress = false;
-         break;
-      }
-
       replacement = load_ring(b, RING_HS_TESS_OFFCHIP, s);
       break;
    case nir_intrinsic_load_ring_tess_offchip_offset_amd:
@@ -162,19 +151,9 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void 
*state)
       }
       break;
    case nir_intrinsic_load_ring_esgs_amd:
-      if (s->use_llvm) {
-         progress = false;
-         break;
-      }
-
       replacement = load_ring(b, stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS 
: RING_ESGS_VS, s);
       break;
    case nir_intrinsic_load_ring_gsvs_amd:
-      if (s->use_llvm) {
-         progress = false;
-         break;
-      }
-
       if (stage == MESA_SHADER_VERTEX)
          replacement = load_ring(b, RING_GSVS_VS, s);
       else
@@ -188,11 +167,6 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void 
*state)
       break;
 
    case nir_intrinsic_load_ring_attr_amd:
-      if (s->use_llvm) {
-         progress = false;
-         break;
-      }
-
       replacement = load_ring(b, RING_PS_ATTR, s);
 
       nir_ssa_def *dword1 = nir_channel(b, replacement, 1);
@@ -550,14 +524,13 @@ load_gsvs_ring(nir_builder *b, lower_abi_state *s, 
unsigned stream_id)
 void
 radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level,
                    const struct radv_shader_info *info, const struct 
radv_shader_args *args,
-                   const struct radv_pipeline_key *pl_key, bool use_llvm, 
uint32_t address32_hi)
+                   const struct radv_pipeline_key *pl_key, uint32_t 
address32_hi)
 {
    lower_abi_state state = {
       .gfx_level = gfx_level,
       .info = info,
       .args = args,
       .pl_key = pl_key,
-      .use_llvm = use_llvm,
       .address32_hi = address32_hi,
    };
 
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 47cbd220cae..c017bc87098 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -56,18 +56,10 @@ struct radv_shader_context {
 
    LLVMValueRef descriptor_sets[MAX_SETS];
 
-   LLVMValueRef ring_offsets;
-
    LLVMValueRef vs_rel_patch_id;
 
    LLVMValueRef gs_wave_id;
 
-   LLVMValueRef esgs_ring;
-   LLVMValueRef gsvs_ring[4];
-   LLVMValueRef hs_ring_tess_offchip;
-   LLVMValueRef hs_ring_tess_factor;
-   LLVMValueRef attr_ring;
-
    uint64_t output_mask;
 };
 
@@ -167,11 +159,6 @@ create_function(struct radv_shader_context *ctx, 
gl_shader_stage stage, bool has
                            
get_llvm_calling_convention(ctx->main_function.value, stage),
                            ctx->max_workgroup_size, ctx->options);
 
-   ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, 
"llvm.amdgcn.implicit.buffer.ptr",
-                                          LLVMPointerType(ctx->ac.i8, 
AC_ADDR_SPACE_CONST), NULL, 0, 0);
-   ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
-                                        
ac_array_in_const_addr_space(ctx->ac.v4i32), "");
-
    load_descriptor_sets(ctx);
 
    if (stage == MESA_SHADER_TESS_CTRL ||
@@ -801,99 +788,6 @@ ac_llvm_finalize_module(struct radv_shader_context *ctx, 
LLVMPassManagerRef pass
    ac_llvm_context_dispose(&ctx->ac);
 }
 
-static void
-ac_setup_rings(struct radv_shader_context *ctx)
-{
-   struct ac_llvm_pointer ring_offsets = { .t = ctx->ac.v4i32, .v = 
ctx->ring_offsets };
-
-   if (ctx->options->gfx_level <= GFX8 &&
-       (ctx->stage == MESA_SHADER_GEOMETRY ||
-        (ctx->stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_es) ||
-        (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->shader_info->tes.as_es))) 
{
-      unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : 
RING_ESGS_VS;
-      LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false);
-
-      ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ring_offsets, offset);
-   }
-
-   if (ctx->args->is_gs_copy_shader) {
-      ctx->gsvs_ring[0] = ac_build_load_to_sgpr(&ctx->ac, ring_offsets, 
LLVMConstInt(ctx->ac.i32, RING_GSVS_VS, false));
-   }
-
-   if (ctx->stage == MESA_SHADER_GEOMETRY) {
-      /* The conceptual layout of the GSVS ring is
-       *   v0c0 .. vLv0 v0c1 .. vLc1 ..
-       * but the real memory layout is swizzled across
-       * threads:
-       *   t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
-       *   t16v0c0 ..
-       * Override the buffer descriptor accordingly.
-       */
-      LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
-      uint64_t stream_offset = 0;
-      unsigned num_records = ctx->ac.wave_size;
-      LLVMValueRef base_ring;
-
-      base_ring = ac_build_load_to_sgpr(&ctx->ac, ring_offsets,
-                                        LLVMConstInt(ctx->ac.i32, 
RING_GSVS_GS, false));
-
-      for (unsigned stream = 0; stream < 4; stream++) {
-         unsigned num_components, stride;
-         LLVMValueRef ring, tmp;
-
-         num_components = 
ctx->shader_info->gs.num_stream_output_components[stream];
-
-         if (!num_components)
-            continue;
-
-         stride = 4 * num_components * ctx->shader->info.gs.vertices_out;
-
-         /* Limit on the stride field for <= GFX7. */
-         assert(stride < (1 << 14));
-
-         ring = LLVMBuildBitCast(ctx->ac.builder, base_ring, v2i64, "");
-         tmp = LLVMBuildExtractElement(ctx->ac.builder, ring, ctx->ac.i32_0, 
"");
-         tmp = LLVMBuildAdd(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i64, 
stream_offset, 0), "");
-         ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp, 
ctx->ac.i32_0, "");
-
-         stream_offset += stride * ctx->ac.wave_size;
-
-         ring = LLVMBuildBitCast(ctx->ac.builder, ring, ctx->ac.v4i32, "");
-
-         tmp = LLVMBuildExtractElement(ctx->ac.builder, ring, ctx->ac.i32_1, 
"");
-         tmp = LLVMBuildOr(ctx->ac.builder, tmp,
-                           LLVMConstInt(ctx->ac.i32, S_008F04_STRIDE(stride), 
false), "");
-         ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp, 
ctx->ac.i32_1, "");
-
-         ring = LLVMBuildInsertElement(ctx->ac.builder, ring,
-                                       LLVMConstInt(ctx->ac.i32, num_records, 
false),
-                                       LLVMConstInt(ctx->ac.i32, 2, false), 
"");
-
-         ctx->gsvs_ring[stream] = ring;
-      }
-   }
-
-   if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == 
MESA_SHADER_TESS_EVAL) {
-      ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(
-         &ctx->ac, ring_offsets, LLVMConstInt(ctx->ac.i32, 
RING_HS_TESS_OFFCHIP, false));
-      ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(
-         &ctx->ac, ring_offsets, LLVMConstInt(ctx->ac.i32, 
RING_HS_TESS_FACTOR, false));
-   }
-
-   if (ctx->options->gfx_level >= GFX11 &&
-       ((ctx->stage == MESA_SHADER_VERTEX && !ctx->shader_info->vs.as_es && 
!ctx->shader_info->vs.as_ls) ||
-        (ctx->stage == MESA_SHADER_TESS_EVAL && !ctx->shader_info->tes.as_es) 
||
-        (ctx->stage == MESA_SHADER_GEOMETRY))) {
-      ctx->attr_ring = ac_build_load_to_sgpr(&ctx->ac, ring_offsets,
-                                             LLVMConstInt(ctx->ac.i32, 
RING_PS_ATTR, false));
-
-      LLVMValueRef tmp = LLVMBuildExtractElement(ctx->ac.builder, 
ctx->attr_ring, ctx->ac.i32_1, "");
-      uint32_t stride = S_008F04_STRIDE(16 * 
ctx->shader_info->outinfo.param_exports);
-      tmp = LLVMBuildOr(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i32, 
stride, false), "");
-      ctx->attr_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->attr_ring, 
tmp, ctx->ac.i32_1, "");
-   }
-}
-
 /* Fixup the HW not emitting the TCS regs if there are no HS threads. */
 static void
 ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx)
@@ -931,35 +825,20 @@ prepare_gs_input_vgprs(struct radv_shader_context *ctx, 
bool merged)
 static void
 declare_esgs_ring(struct radv_shader_context *ctx)
 {
-   if (ctx->esgs_ring)
-      return;
-
    assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
 
-   ctx->esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, 
LLVMArrayType(ctx->ac.i32, 0),
-                                                "esgs_ring", 
AC_ADDR_SPACE_LDS);
-   LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
-   LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
+   LLVMValueRef esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, 
LLVMArrayType(ctx->ac.i32, 0),
+                                                        "esgs_ring", 
AC_ADDR_SPACE_LDS);
+   LLVMSetLinkage(esgs_ring, LLVMExternalLinkage);
+   LLVMSetAlignment(esgs_ring, 64 * 1024);
 }
 
 static LLVMValueRef radv_intrinsic_load(struct ac_shader_abi *abi, 
nir_intrinsic_instr *intrin)
 {
-   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-
    switch (intrin->intrinsic) {
    case nir_intrinsic_load_base_vertex:
    case nir_intrinsic_load_first_vertex:
       return radv_load_base_vertex(abi, intrin->intrinsic == 
nir_intrinsic_load_base_vertex);
-   case nir_intrinsic_load_ring_tess_factors_amd:
-      return ctx->hs_ring_tess_factor;
-   case nir_intrinsic_load_ring_tess_offchip_amd:
-      return ctx->hs_ring_tess_offchip;
-   case nir_intrinsic_load_ring_esgs_amd:
-      return ctx->esgs_ring;
-   case nir_intrinsic_load_ring_attr_amd:
-      return ctx->attr_ring;
-   case nir_intrinsic_load_ring_gsvs_amd:
-      return ctx->gsvs_ring[nir_intrinsic_stream_id(intrin)];
    default:
       return NULL;
    }
@@ -1107,8 +986,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
       nir_foreach_shader_out_variable(variable, shaders[shader_idx]) 
scan_shader_output_decl(
          &ctx, variable, shaders[shader_idx], shaders[shader_idx]->info.stage);
 
-      ac_setup_rings(&ctx);
-
       bool check_merged_wave_info = shader_count >= 2 && !(is_ngg && 
shader_idx == 1);
       LLVMBasicBlockRef merge_block = NULL;
 
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 0083ba337dd..fd452f56843 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3069,8 +3069,7 @@ radv_pipeline_create_gs_copy_shader(struct radv_pipeline 
*pipeline,
    info.inline_push_constant_mask = gs_copy_args.ac.inline_push_const_mask;
 
    NIR_PASS_V(nir, radv_nir_lower_abi, 
device->physical_device->rad_info.gfx_level, &info,
-              &gs_copy_args, pipeline_key, radv_use_llvm_for_stage(device, 
MESA_SHADER_VERTEX),
-              device->physical_device->rad_info.address32_hi);
+              &gs_copy_args, pipeline_key, 
device->physical_device->rad_info.address32_hi);
 
    return radv_create_gs_copy_shader(device, nir, &info, &gs_copy_args, 
gs_copy_binary,
                                      keep_executable_info, keep_statistic_info,
@@ -3332,7 +3331,6 @@ radv_postprocess_nir(struct radv_pipeline *pipeline,
 
    NIR_PASS(_, stage->nir, ac_nir_lower_global_access);
    NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, &stage->info, 
&stage->args, pipeline_key,
-              radv_use_llvm_for_stage(device, stage->stage),
               device->physical_device->rad_info.address32_hi);
    radv_optimize_nir_algebraic(
       stage->nir, io_to_mem || lowered_ngg || stage->stage == 
MESA_SHADER_COMPUTE ||
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 3069b69159f..d517143d0e5 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -550,8 +550,7 @@ nir_shader *radv_shader_spirv_to_nir(struct radv_device 
*device,
 
 void radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level,
                         const struct radv_shader_info *info, const struct 
radv_shader_args *args,
-                        const struct radv_pipeline_key *pl_key, bool use_llvm,
-                        uint32_t address32_hi);
+                        const struct radv_pipeline_key *pl_key, uint32_t 
address32_hi);
 
 void radv_init_shader_arenas(struct radv_device *device);
 void radv_destroy_shader_arenas(struct radv_device *device);
diff --git a/src/amd/vulkan/radv_shader_args.c 
b/src/amd/vulkan/radv_shader_args.c
index 74232013d59..259a3da211a 100644
--- a/src/amd/vulkan/radv_shader_args.c
+++ b/src/amd/vulkan/radv_shader_args.c
@@ -583,9 +583,7 @@ radv_declare_shader_args(enum amd_gfx_level gfx_level, 
const struct radv_pipelin
    allocate_user_sgprs(gfx_level, info, args, stage, has_previous_stage, 
previous_stage,
                        needs_view_index, has_ngg_query, has_ngg_provoking_vtx, 
key, &user_sgpr_info);
 
-   if (args->explicit_scratch_args) {
-      ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, 
&args->ac.ring_offsets);
-   }
+   ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, 
&args->ac.ring_offsets);
    if (stage == MESA_SHADER_TASK) {
       ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, 
&args->task_ring_offsets);
    }

Reply via email to