From: Marek Olšák <marek.ol...@amd.com>

---
 src/amd/common/ac_llvm_build.c           | 62 +++++++++----------------
 src/amd/common/ac_llvm_build.h           | 34 ++++----------
 src/amd/common/ac_nir_to_llvm.c          | 16 +++----
 src/gallium/drivers/radeonsi/si_shader.c | 79 ++++++++++++++++----------------
 4 files changed, 77 insertions(+), 114 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 34085bb..cc1eaf1 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -536,87 +536,69 @@ ac_build_indexed_load_const(struct ac_llvm_context *ctx,
        LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
        LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
        return result;
 }
 
 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by 
num_channels=1..4.
  * The type of vdata must be one of i32 (num_channels=1), v2i32 
(num_channels=2),
  * or v4i32 (num_channels=3,4).
  */
 void
-ac_build_tbuffer_store(struct ac_llvm_context *ctx,
-                      LLVMValueRef rsrc,
-                      LLVMValueRef vdata,
-                      unsigned num_channels,
-                      LLVMValueRef vaddr,
-                      LLVMValueRef soffset,
-                      unsigned inst_offset,
-                      unsigned dfmt,
-                      unsigned nfmt,
-                      unsigned offen,
-                      unsigned idxen,
-                      unsigned glc,
-                      unsigned slc,
-                      unsigned tfe)
+ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
+                           LLVMValueRef rsrc,
+                           LLVMValueRef vdata,
+                           unsigned num_channels,
+                           LLVMValueRef vaddr,
+                           LLVMValueRef soffset,
+                           unsigned inst_offset,
+                           bool offen,
+                           bool glc,
+                           bool slc)
 {
+       static unsigned dfmt[] = {
+               V_008F0C_BUF_DATA_FORMAT_32,
+               V_008F0C_BUF_DATA_FORMAT_32_32,
+               V_008F0C_BUF_DATA_FORMAT_32_32_32,
+               V_008F0C_BUF_DATA_FORMAT_32_32_32_32
+       };
+       assert(num_channels >= 1 && num_channels <= 4);
+
        LLVMValueRef args[] = {
                rsrc,
                vdata,
                LLVMConstInt(ctx->i32, num_channels, 0),
                vaddr,
                soffset,
                LLVMConstInt(ctx->i32, inst_offset, 0),
-               LLVMConstInt(ctx->i32, dfmt, 0),
-               LLVMConstInt(ctx->i32, nfmt, 0),
+               LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
+               LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
                LLVMConstInt(ctx->i32, offen, 0),
-               LLVMConstInt(ctx->i32, idxen, 0),
+               LLVMConstInt(ctx->i32, 0, 0), /* idxen */
                LLVMConstInt(ctx->i32, glc, 0),
                LLVMConstInt(ctx->i32, slc, 0),
-               LLVMConstInt(ctx->i32, tfe, 0)
+               LLVMConstInt(ctx->i32, 0, 0), /* tfe*/
        };
 
        /* The instruction offset field has 12 bits */
        assert(offen || inst_offset < (1 << 12));
 
        /* The intrinsic is overloaded, we need to add a type suffix for 
overloading to work. */
        unsigned func = CLAMP(num_channels, 1, 3) - 1;
        const char *types[] = {"i32", "v2i32", "v4i32"};
        char name[256];
        snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
 
        ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
                               args, ARRAY_SIZE(args),
                               AC_FUNC_ATTR_LEGACY);
 }
 
-void
-ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
-                             LLVMValueRef rsrc,
-                             LLVMValueRef vdata,
-                             unsigned num_channels,
-                             LLVMValueRef vaddr,
-                             LLVMValueRef soffset,
-                             unsigned inst_offset)
-{
-       static unsigned dfmt[] = {
-               V_008F0C_BUF_DATA_FORMAT_32,
-               V_008F0C_BUF_DATA_FORMAT_32_32,
-               V_008F0C_BUF_DATA_FORMAT_32_32_32,
-               V_008F0C_BUF_DATA_FORMAT_32_32_32_32
-       };
-       assert(num_channels >= 1 && num_channels <= 4);
-
-       ac_build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset,
-                              inst_offset, dfmt[num_channels - 1],
-                              V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
-}
-
 LLVMValueRef
 ac_build_buffer_load(struct ac_llvm_context *ctx,
                     LLVMValueRef rsrc,
                     int num_channels,
                     LLVMValueRef vindex,
                     LLVMValueRef voffset,
                     LLVMValueRef soffset,
                     unsigned inst_offset,
                     unsigned glc,
                     unsigned slc,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index e7773d7..65a9a05 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -115,44 +115,30 @@ ac_build_indexed_store(struct ac_llvm_context *ctx,
 LLVMValueRef
 ac_build_indexed_load(struct ac_llvm_context *ctx,
                      LLVMValueRef base_ptr, LLVMValueRef index,
                      bool uniform);
 
 LLVMValueRef
 ac_build_indexed_load_const(struct ac_llvm_context *ctx,
                            LLVMValueRef base_ptr, LLVMValueRef index);
 
 void
-ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
-                             LLVMValueRef rsrc,
-                             LLVMValueRef vdata,
-                             unsigned num_channels,
-                             LLVMValueRef vaddr,
-                             LLVMValueRef soffset,
-                             unsigned inst_offset);
-
-void
-ac_build_tbuffer_store(struct ac_llvm_context *ctx,
-                      LLVMValueRef rsrc,
-                      LLVMValueRef vdata,
-                      unsigned num_channels,
-                      LLVMValueRef vaddr,
-                      LLVMValueRef soffset,
-                      unsigned inst_offset,
-                      unsigned dfmt,
-                      unsigned nfmt,
-                      unsigned offen,
-                      unsigned idxen,
-                      unsigned glc,
-                      unsigned slc,
-                      unsigned tfe);
-
+ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
+                           LLVMValueRef rsrc,
+                           LLVMValueRef vdata,
+                           unsigned num_channels,
+                           LLVMValueRef vaddr,
+                           LLVMValueRef soffset,
+                           unsigned inst_offset,
+                           bool offen,
+                           bool glc,
+                           bool slc);
 LLVMValueRef
 ac_build_buffer_load(struct ac_llvm_context *ctx,
                     LLVMValueRef rsrc,
                     int num_channels,
                     LLVMValueRef vindex,
                     LLVMValueRef voffset,
                     LLVMValueRef soffset,
                     unsigned inst_offset,
                     unsigned glc,
                     unsigned slc,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 30d48aa..4143b3c 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3131,26 +3131,24 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
                }
                for (unsigned j = 0; j < length; j++) {
                        LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
                                                             out_ptr[j], "");
                        LLVMValueRef voffset = LLVMConstInt(ctx->i32, (slot * 4 
+ j + start) * ctx->gs_max_out_vertices, false);
                        voffset = LLVMBuildAdd(ctx->builder, voffset, 
gs_next_vertex, "");
                        voffset = LLVMBuildMul(ctx->builder, voffset, 
LLVMConstInt(ctx->i32, 4, false), "");
 
                        out_val = LLVMBuildBitCast(ctx->builder, out_val, 
ctx->i32, "");
 
-                       ac_build_tbuffer_store(&ctx->ac, ctx->gsvs_ring,
-                                              out_val, 1,
-                                              voffset, ctx->gs2vs_offset, 0,
-                                              V_008F0C_BUF_DATA_FORMAT_32,
-                                              V_008F0C_BUF_NUM_FORMAT_UINT,
-                                              1, 0, 1, 1, 0);
+                       ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
+                                                   out_val, 1,
+                                                   voffset, ctx->gs2vs_offset, 
0,
+                                                   1, 1, 1);
                }
                idx += slot_inc;
        }
 
        gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex,
                                      ctx->i32one, "");
        LLVMBuildStore(ctx->builder, gs_next_vertex, ctx->gs_next_vertex);
 
        ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 
8), ctx->gs_wave_id);
 }
@@ -4631,28 +4629,26 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx)
                }
                param_index = shader_io_get_unique_index(i);
 
                if (param_index > max_output_written)
                        max_output_written = param_index;
 
                for (j = 0; j < length; j++) {
                        LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, 
out_ptr[j], "");
                        out_val = LLVMBuildBitCast(ctx->builder, out_val, 
ctx->i32, "");
 
-                       ac_build_tbuffer_store(&ctx->ac,
+                       ac_build_buffer_store_dword(&ctx->ac,
                                               ctx->esgs_ring,
                                               out_val, 1,
                                               LLVMGetUndef(ctx->i32), 
ctx->es2gs_offset,
                                               (4 * param_index + j + start) * 
4,
-                                              V_008F0C_BUF_DATA_FORMAT_32,
-                                              V_008F0C_BUF_NUM_FORMAT_UINT,
-                                              0, 0, 1, 1, 0);
+                                              0, 1, 1);
                }
        }
        ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16;
 }
 
 static void
 si_export_mrt_color(struct nir_to_llvm_context *ctx,
                    LLVMValueRef *color, unsigned param, bool is_last)
 {
        LLVMValueRef args[9];
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 699fefd..daaf9f1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1040,31 +1040,31 @@ static void store_output_tcs(struct 
lp_build_tgsi_context *bld_base,
                        value = ac_emit_clamp(&ctx->ac, value);
 
                /* Skip LDS stores if there is no LDS read of this output. */
                if (!skip_lds_store)
                        lds_store(bld_base, chan_index, dw_addr, value);
 
                value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
                values[chan_index] = value;
 
                if (inst->Dst[0].Register.WriteMask != 0xF && !is_tess_factor) {
-                       ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 
1,
-                                                     buf_addr, base,
-                                                     4 * chan_index);
+                       ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
+                                                   buf_addr, base,
+                                                   4 * chan_index, 1, 1, 1);
                }
        }
 
        if (inst->Dst[0].Register.WriteMask == 0xF && !is_tess_factor) {
                LLVMValueRef value = 
lp_build_gather_values(bld_base->base.gallivm,
                                                            values, 4);
-               ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 4, 
buf_addr,
-                                             base, 0);
+               ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, 
buf_addr,
+                                           base, 0, 1, 1, 1);
        }
 }
 
 static LLVMValueRef fetch_input_gs(
        struct lp_build_tgsi_context *bld_base,
        const struct tgsi_full_src_register *reg,
        enum tgsi_opcode_type type,
        unsigned swizzle)
 {
        struct lp_build_context *base = &bld_base->base;
@@ -2076,25 +2076,25 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
        case 3: /* as v4i32 (aligned to 4) */
        case 4: /* as v4i32 */
                vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, 
util_next_power_of_two(num_comps)));
                for (int j = 0; j < num_comps; j++) {
                        vdata = LLVMBuildInsertElement(builder, vdata, out[j],
                                                       LLVMConstInt(ctx->i32, 
j, 0), "");
                }
                break;
        }
 
-       ac_build_tbuffer_store_dwords(&ctx->ac, so_buffers[buf_idx],
-                                     vdata, num_comps,
-                                     so_write_offsets[buf_idx],
-                                     LLVMConstInt(ctx->i32, 0, 0),
-                                     stream_out->dst_offset * 4);
+       ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx],
+                                   vdata, num_comps,
+                                   so_write_offsets[buf_idx],
+                                   LLVMConstInt(ctx->i32, 0, 0),
+                                   stream_out->dst_offset * 4, 1, 1, 1);
 }
 
 /**
  * Write streamout data to buffers for vertex stream @p stream (different
  * vertex streams can occur for GS copy shaders).
  */
 static void si_llvm_emit_streamout(struct si_shader_context *ctx,
                                   struct si_shader_output_values *outputs,
                                   unsigned noutput, unsigned stream)
 {
@@ -2404,22 +2404,22 @@ static void si_copy_tcs_inputs(struct 
lp_build_tgsi_context *bld_base)
                                             "");
 
                LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(ctx,
                                              get_rel_patch_id(ctx),
                                              invocation_id,
                                              lp_build_const_int32(gallivm, i));
 
                LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0,
                                              lds_ptr);
 
-               ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 4, 
buffer_addr,
-                                             buffer_offset, 0);
+               ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, 
buffer_addr,
+                                           buffer_offset, 0, 1, 1, 1);
        }
 }
 
 static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
                                  LLVMValueRef rel_patch_id,
                                  LLVMValueRef invocation_id,
                                  LLVMValueRef 
tcs_out_current_patch_data_offset)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -2517,65 +2517,68 @@ static void si_write_tess_factors(struct 
lp_build_tgsi_context *bld_base,
        tf_base = LLVMGetParam(ctx->main_fn,
                               SI_PARAM_TESS_FACTOR_OFFSET);
        byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
                                  lp_build_const_int32(gallivm, 4 * stride), 
"");
 
        lp_build_if(&inner_if_ctx, gallivm,
                    LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
                                  rel_patch_id, bld_base->uint_bld.zero, ""));
 
        /* Store the dynamic HS control word. */
-       ac_build_tbuffer_store_dwords(&ctx->ac, buffer,
-                                     lp_build_const_int32(gallivm, 0x80000000),
-                                     1, lp_build_const_int32(gallivm, 0), 
tf_base, 0);
+       ac_build_buffer_store_dword(&ctx->ac, buffer,
+                                   lp_build_const_int32(gallivm, 0x80000000),
+                                   1, lp_build_const_int32(gallivm, 0), 
tf_base,
+                                   0, 1, 1, 1);
 
        lp_build_endif(&inner_if_ctx);
 
        /* Store the tessellation factors. */
-       ac_build_tbuffer_store_dwords(&ctx->ac, buffer, vec0,
-                                     MIN2(stride, 4), byteoffset, tf_base, 4);
+       ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
+                                   MIN2(stride, 4), byteoffset, tf_base,
+                                   4, 1, 1, 1);
        if (vec1)
-               ac_build_tbuffer_store_dwords(&ctx->ac, buffer, vec1,
-                                             stride - 4, byteoffset, tf_base, 
20);
+               ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
+                                           stride - 4, byteoffset, tf_base,
+                                           20, 1, 1, 1);
 
        /* Store the tess factors into the offchip buffer if TES reads them. */
        if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
                LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
                LLVMValueRef tf_inner_offset;
                unsigned param_outer, param_inner;
 
                buf = ac_build_indexed_load_const(&ctx->ac, rw_buffers,
                                LLVMConstInt(ctx->i32, SI_HS_RING_TESS_OFFCHIP, 
0));
                base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
 
                param_outer = si_shader_io_get_unique_index(
                                      TGSI_SEMANTIC_TESSOUTER, 0);
                tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, 
NULL,
                                        LLVMConstInt(ctx->i32, param_outer, 0));
 
                outer_vec = lp_build_gather_values(gallivm, outer,
                                                   
util_next_power_of_two(outer_comps));
 
-               ac_build_tbuffer_store_dwords(&ctx->ac, buf, outer_vec,
-                                             outer_comps, tf_outer_offset,
-                                             base, 0);
+               ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
+                                           outer_comps, tf_outer_offset,
+                                           base, 0, 1, 1, 1);
                if (inner_comps) {
                        param_inner = si_shader_io_get_unique_index(
                                              TGSI_SEMANTIC_TESSINNER, 0);
                        tf_inner_offset = get_tcs_tes_buffer_address(ctx, 
rel_patch_id, NULL,
                                        LLVMConstInt(ctx->i32, param_inner, 0));
 
                        inner_vec = inner_comps == 1 ? inner[0] :
                                    lp_build_gather_values(gallivm, inner, 
inner_comps);
-                       ac_build_tbuffer_store_dwords(&ctx->ac, buf, inner_vec,
-                                                     inner_comps, 
tf_inner_offset,
-                                                     base, 0);
+                       ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
+                                                   inner_comps, 
tf_inner_offset,
+                                                   base, 0, 1, 1, 1);
                }
        }
 
        lp_build_endif(&if_ctx);
 }
 
 /* This only writes the tessellation factor levels. */
 static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
@@ -2681,28 +2684,26 @@ static void si_llvm_emit_es_epilogue(struct 
lp_build_tgsi_context *bld_base)
                    info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
                        continue;
 
                param_index = 
si_shader_io_get_unique_index(info->output_semantic_name[i],
                                                            
info->output_semantic_index[i]);
 
                for (chan = 0; chan < 4; chan++) {
                        LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, 
out_ptr[chan], "");
                        out_val = LLVMBuildBitCast(gallivm->builder, out_val, 
ctx->i32, "");
 
-                       ac_build_tbuffer_store(&ctx->ac,
-                                              ctx->esgs_ring,
-                                              out_val, 1,
-                                              LLVMGetUndef(ctx->i32), soffset,
-                                              (4 * param_index + chan) * 4,
-                                              V_008F0C_BUF_DATA_FORMAT_32,
-                                              V_008F0C_BUF_NUM_FORMAT_UINT,
-                                              0, 0, 1, 1, 0);
+                       ac_build_buffer_store_dword(&ctx->ac,
+                                                   ctx->esgs_ring,
+                                                   out_val, 1,
+                                                   LLVMGetUndef(ctx->i32), 
soffset,
+                                                   (4 * param_index + chan) * 
4,
+                                                   0, 1, 1);
                }
        }
 }
 
 static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
 
        ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
                        LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID));
@@ -5049,27 +5050,25 @@ static void si_llvm_emit_vertex(
                        LLVMValueRef voffset =
                                lp_build_const_int32(gallivm, offset *
                                                     
shader->selector->gs_max_out_vertices);
                        offset++;
 
                        voffset = lp_build_add(uint, voffset, gs_next_vertex);
                        voffset = lp_build_mul_imm(uint, voffset, 4);
 
                        out_val = LLVMBuildBitCast(gallivm->builder, out_val, 
ctx->i32, "");
 
-                       ac_build_tbuffer_store(&ctx->ac,
-                                              ctx->gsvs_ring[stream],
-                                              out_val, 1,
-                                              voffset, soffset, 0,
-                                              V_008F0C_BUF_DATA_FORMAT_32,
-                                              V_008F0C_BUF_NUM_FORMAT_UINT,
-                                              1, 0, 1, 1, 0);
+                       ac_build_buffer_store_dword(&ctx->ac,
+                                                   ctx->gsvs_ring[stream],
+                                                   out_val, 1,
+                                                   voffset, soffset, 0,
+                                                   1, 1, 1);
                }
        }
 
        gs_next_vertex = lp_build_add(uint, gs_next_vertex,
                                      lp_build_const_int32(gallivm, 1));
 
        LLVMBuildStore(gallivm->builder, gs_next_vertex, 
ctx->gs_next_vertex[stream]);
 
        /* Signal vertex emission */
        ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | 
(stream << 8),
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to