v2: do not force enable IDXEN when unecessary Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> --- src/amd/common/ac_llvm_build.c | 111 ++++++++++++++++++++++++++++++++ src/amd/common/ac_llvm_build.h | 26 ++++++++ src/amd/common/ac_nir_to_llvm.c | 26 ++------ 3 files changed, 142 insertions(+), 21 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 8d5682f6f7a..06dc1383121 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -1554,6 +1554,117 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx, ac_get_load_intr_attribs(can_speculate)); } +static void +ac_build_llvm8_tbuffer_store(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vdata, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + unsigned num_channels, + unsigned dfmt, + unsigned nfmt, + bool glc, + bool slc, + bool writeonly_memory, + bool structurized) +{ + LLVMValueRef args[7]; + int idx = 0; + args[idx++] = vdata; + args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""); + if (structurized) + args[idx++] = vindex ? vindex : ctx->i32_0; + args[idx++] = voffset ? voffset : ctx->i32_0; + args[idx++] = soffset ? soffset : ctx->i32_0; + args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0); + args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0); + unsigned func = CLAMP(num_channels, 1, 3) - 1; + + const char *type_names[] = {"i32", "v2i32", "v4i32"}; + const char *indexing_kind = structurized ? "struct" : "raw"; + char name[256]; + + snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s", + indexing_kind, type_names[func]); + + ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, + ac_get_store_intr_attribs(writeonly_memory)); +} + +void +ac_build_tbuffer_store(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vdata, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + LLVMValueRef immoffset, + unsigned num_channels, + unsigned dfmt, + unsigned nfmt, + bool glc, + bool slc, + bool writeonly_memory) +{ + if (HAVE_LLVM >= 0x800) { + bool structurized = vindex && vindex != ctx->i32_0; + + voffset = LLVMBuildAdd(ctx->builder, + voffset ? voffset : ctx->i32_0, + immoffset, ""); + + ac_build_llvm8_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, + soffset, num_channels, dfmt, nfmt, + glc, slc, writeonly_memory, + structurized); + } else { + LLVMValueRef params[] = { + vdata, + rsrc, + vindex, + voffset ? voffset : ctx->i32_0, + soffset ? soffset : ctx->i32_0, + immoffset, + LLVMConstInt(ctx->i32, dfmt, false), + LLVMConstInt(ctx->i32, nfmt, false), + LLVMConstInt(ctx->i32, glc, false), + LLVMConstInt(ctx->i32, slc, false), + }; + unsigned func = CLAMP(num_channels, 1, 3) - 1; + const char *type_names[] = {"i32", "v2i32", "v4i32"}; + char name[256]; + + snprintf(name, sizeof(name), "llvm.amdgcn.tbuffer.store.%s", + type_names[func]); + + ac_build_intrinsic(ctx, name, ctx->voidt, params, 10, + ac_get_store_intr_attribs(writeonly_memory)); + } +} + +void +ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vdata, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + bool glc, + bool slc, + bool writeonly_memory) +{ + unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16; + unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; + + vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, ""); + vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, ""); + + ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset, + ctx->i32_0, 1, dfmt, nfmt, glc, slc, + writeonly_memory); +} + /** * Set range metadata on an instruction. This can only be used on load and * call instructions. If you know an instruction can only produce the values diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 069ba7aa3c9..5ca93e66982 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -331,6 +331,32 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx, bool can_speculate, bool structurized); +void +ac_build_tbuffer_store_short(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vdata, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + bool glc, + bool slc, + bool writeonly_memory); + +void +ac_build_tbuffer_store(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vdata, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + LLVMValueRef immoffset, + unsigned num_channels, + unsigned dfmt, + unsigned nfmt, + bool glc, + bool slc, + bool writeonly_memory); + LLVMValueRef ac_get_thread_id(struct ac_llvm_context *ctx); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index f4d408cd587..f2070eb9a8e 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1521,14 +1521,12 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx, static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { - const char *store_name; LLVMValueRef src_data = get_src(ctx, instr->src[0]); int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8; unsigned writemask = nir_intrinsic_write_mask(instr); enum gl_access_qualifier access = nir_intrinsic_access(instr); bool writeonly_memory = access & ACCESS_NON_READABLE; unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory); - LLVMValueRef glc = (cache_policy & ac_glc) ? ctx->ac.i1true : ctx->ac.i1false; LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, get_src(ctx, instr->src[1]), true); @@ -1571,25 +1569,11 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), ""); if (num_bytes == 2) { - store_name = "llvm.amdgcn.tbuffer.store.i32"; - data_type = ctx->ac.i32; - data = LLVMBuildBitCast(ctx->ac.builder, data, ctx->ac.i16, ""); - data = LLVMBuildZExt(ctx->ac.builder, data, data_type, ""); - LLVMValueRef tbuffer_params[] = { - data, - rsrc, - ctx->ac.i32_0, /* vindex */ - offset, /* voffset */ - ctx->ac.i32_0, - ctx->ac.i32_0, - LLVMConstInt(ctx->ac.i32, 2, false), // dfmt (= 16bit) - LLVMConstInt(ctx->ac.i32, 4, false), // nfmt (= uint) - glc, - ctx->ac.i1false, - }; - ac_build_intrinsic(&ctx->ac, store_name, - ctx->ac.voidt, tbuffer_params, 10, - ac_get_store_intr_attribs(writeonly_memory)); + ac_build_tbuffer_store_short(&ctx->ac, rsrc, data, + ctx->ac.i32_0, offset, + ctx->ac.i32_0, + cache_policy & ac_glc, + false, writeonly_memory); } else { int num_channels = num_bytes / 4; -- 2.21.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev