From: Marek Olšák <marek.ol...@amd.com> I like the writeonly wrapper more than using ac_build_store directly. --- src/amd/common/ac_llvm_build.c | 6 ++++++ src/amd/common/ac_llvm_build.h | 2 ++ src/amd/common/ac_nir_to_llvm.c | 4 ++-- src/gallium/drivers/radeonsi/si_shader.c | 15 ++++++++++----- 4 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index a85ffe1..4c9beda 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1771,20 +1771,26 @@ LLVMValueRef ac_lds_load_volatile(struct ac_llvm_context *ctx, { return ac_build_load_custom(ctx, ctx->lds, dw_addr, false, false, true); } void ac_lds_store_volatile(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value) { ac_build_store(ctx, ctx->lds, dw_addr, ac_to_integer(ctx, value), true); } +void ac_lds_store_writeonly(struct ac_llvm_context *ctx, + LLVMValueRef dw_addr, LLVMValueRef value) +{ + ac_build_store(ctx, ctx->lds, dw_addr, ac_to_integer(ctx, value), false); +} + LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0) { LLVMValueRef params[2] = { src0, /* The value of 1 means that ffs(x=0) = undef, so LLVM won't * add special code to check for x=0. The reason is that * the LLVM behavior for x=0 is different from what we diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index e3f716e..25a540a 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -288,19 +288,21 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ac, uint8_t *vs_output_param_offset, uint32_t num_outputs, uint8_t *num_param_exports); void ac_init_exec_full_mask(struct ac_llvm_context *ctx); void ac_declare_lds_as_pointer(struct ac_llvm_context *ac); LLVMValueRef ac_lds_load_volatile(struct ac_llvm_context *ctx, LLVMValueRef dw_addr); void ac_lds_store_volatile(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value); +void ac_lds_store_writeonly(struct ac_llvm_context *ctx, + LLVMValueRef dw_addr, LLVMValueRef value); LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0); #ifdef __cplusplus } #endif #endif diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index fa30b91..3f41b9f 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -5841,21 +5841,21 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx, if (lds_base) { dw_addr = LLVMBuildAdd(ctx->builder, lds_base, LLVMConstInt(ctx->ac.i32, param_index * 4, false), ""); } for (j = 0; j < length; j++) { LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], ""); out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->ac.i32, ""); if (ctx->ac.chip_class >= GFX9) { - ac_lds_store_volatile(&ctx->ac, dw_addr, + ac_lds_store_writeonly(&ctx->ac, dw_addr, LLVMBuildLoad(ctx->builder, out_ptr[j], "")); dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, ""); } else { ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, 1, NULL, ctx->es2gs_offset, (4 * param_index + j) * 4, 1, 1, true, true); } @@ -5881,21 +5881,21 @@ handle_ls_outputs_post(struct nir_to_llvm_context *ctx) if (i == VARYING_SLOT_CLIP_DIST0) length = ctx->num_output_clips + ctx->num_output_culls; int param = shader_io_get_unique_index(i); mark_tess_output(ctx, false, param); if (length > 4) mark_tess_output(ctx, false, param + 1); LLVMValueRef dw_addr = LLVMBuildAdd(ctx->builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, false), ""); for (unsigned j = 0; j < length; j++) { - ac_lds_store_volatile(&ctx->ac, dw_addr, + ac_lds_store_writeonly(&ctx->ac, dw_addr, LLVMBuildLoad(ctx->builder, out_ptr[j], "")); dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, ""); } } } struct ac_build_if_state { struct nir_to_llvm_context *ctx; LLVMValueRef condition; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ec4cf89..98ac914 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1110,28 +1110,31 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base, /** * Store to LDS. * * \param swizzle offset (typically 0..3) * \param dw_addr address in dwords * \param value value to store */ static void lds_store(struct lp_build_tgsi_context *bld_base, unsigned dw_offset_imm, LLVMValueRef dw_addr, - LLVMValueRef value) + LLVMValueRef value, bool Volatile) { struct si_shader_context *ctx = si_shader_context(bld_base); dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, LLVMConstInt(ctx->i32, dw_offset_imm, 0)); - ac_lds_store_volatile(&ctx->ac, dw_addr, value); + if (Volatile) + ac_lds_store_volatile(&ctx->ac, dw_addr, value); + else + ac_lds_store_writeonly(&ctx->ac, dw_addr, value); } static LLVMValueRef desc_from_addr_base64k(struct si_shader_context *ctx, unsigned param) { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef addr = LLVMGetParam(ctx->main_fn, param); addr = LLVMBuildZExt(builder, addr, ctx->i64, ""); addr = LLVMBuildShl(builder, addr, LLVMConstInt(ctx->i64, 16, 0), ""); @@ -1260,21 +1263,21 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, uint32_t writemask = reg->Register.WriteMask; while (writemask) { chan_index = u_bit_scan(&writemask); LLVMValueRef value = dst[chan_index]; if (inst->Instruction.Saturate) value = ac_build_clamp(&ctx->ac, value); /* Skip LDS stores if there is no LDS read of this output. */ if (!skip_lds_store) - lds_store(bld_base, chan_index, dw_addr, value); + lds_store(bld_base, chan_index, dw_addr, value, true); value = ac_to_integer(&ctx->ac, value); values[chan_index] = value; if (reg->Register.WriteMask != 0xF && !is_tess_factor) { ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, buf_addr, base, 4 * chan_index, 1, 0, true, false); } @@ -3179,21 +3182,22 @@ static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base) if (name == TGSI_SEMANTIC_LAYER || name == TGSI_SEMANTIC_VIEWPORT_INDEX) continue; int param = si_shader_io_get_unique_index(name, index); LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->i32, param * 4, 0), ""); for (chan = 0; chan < 4; chan++) { lds_store(bld_base, chan, dw_addr, - LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], "")); + LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], ""), + false); } } if (ctx->screen->b.chip_class >= GFX9) si_set_ls_return_value_for_tcs(ctx); } static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); @@ -3226,21 +3230,22 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base) param = si_shader_io_get_unique_index(info->output_semantic_name[i], info->output_semantic_index[i]); for (chan = 0; chan < 4; chan++) { LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], ""); out_val = ac_to_integer(&ctx->ac, out_val); /* GFX9 has the ESGS ring in LDS. */ if (ctx->screen->b.chip_class >= GFX9) { - lds_store(bld_base, param * 4 + chan, lds_base, out_val); + lds_store(bld_base, param * 4 + chan, lds_base, out_val, + false); continue; } ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, 1, NULL, soffset, (4 * param + chan) * 4, 1, 1, true, true); } } -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev