From: Marek Olšák <marek.ol...@amd.com> --- src/amd/common/ac_llvm_build.c | 62 +++++++++---------------- src/amd/common/ac_llvm_build.h | 34 ++++---------- src/amd/common/ac_nir_to_llvm.c | 16 +++---- src/gallium/drivers/radeonsi/si_shader.c | 79 ++++++++++++++++---------------- 4 files changed, 77 insertions(+), 114 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 34085bb..cc1eaf1 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -536,87 +536,69 @@ ac_build_indexed_load_const(struct ac_llvm_context *ctx, LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true); LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); return result; } /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), * or v4i32 (num_channels=3,4). */ void -ac_build_tbuffer_store(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - unsigned num_channels, - LLVMValueRef vaddr, - LLVMValueRef soffset, - unsigned inst_offset, - unsigned dfmt, - unsigned nfmt, - unsigned offen, - unsigned idxen, - unsigned glc, - unsigned slc, - unsigned tfe) +ac_build_buffer_store_dword(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vdata, + unsigned num_channels, + LLVMValueRef vaddr, + LLVMValueRef soffset, + unsigned inst_offset, + bool offen, + bool glc, + bool slc) { + static unsigned dfmt[] = { + V_008F0C_BUF_DATA_FORMAT_32, + V_008F0C_BUF_DATA_FORMAT_32_32, + V_008F0C_BUF_DATA_FORMAT_32_32_32, + V_008F0C_BUF_DATA_FORMAT_32_32_32_32 + }; + assert(num_channels >= 1 && num_channels <= 4); + LLVMValueRef args[] = { rsrc, vdata, LLVMConstInt(ctx->i32, num_channels, 0), vaddr, soffset, LLVMConstInt(ctx->i32, inst_offset, 0), - LLVMConstInt(ctx->i32, dfmt, 0), - LLVMConstInt(ctx->i32, nfmt, 0), + LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0), + LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0), LLVMConstInt(ctx->i32, offen, 0), - LLVMConstInt(ctx->i32, idxen, 0), + LLVMConstInt(ctx->i32, 0, 0), /* idxen */ LLVMConstInt(ctx->i32, glc, 0), LLVMConstInt(ctx->i32, slc, 0), - LLVMConstInt(ctx->i32, tfe, 0) + LLVMConstInt(ctx->i32, 0, 0), /* tfe*/ }; /* The instruction offset field has 12 bits */ assert(offen || inst_offset < (1 << 12)); /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */ unsigned func = CLAMP(num_channels, 1, 3) - 1; const char *types[] = {"i32", "v2i32", "v4i32"}; char name[256]; snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]); ac_emit_llvm_intrinsic(ctx, name, ctx->voidt, args, ARRAY_SIZE(args), AC_FUNC_ATTR_LEGACY); } -void -ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - unsigned num_channels, - LLVMValueRef vaddr, - LLVMValueRef soffset, - unsigned inst_offset) -{ - static unsigned dfmt[] = { - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_DATA_FORMAT_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32_32 - }; - assert(num_channels >= 1 && num_channels <= 4); - - ac_build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset, - inst_offset, dfmt[num_channels - 1], - V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0); -} - LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels, LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, unsigned inst_offset, unsigned glc, unsigned slc, diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index e7773d7..65a9a05 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -115,44 +115,30 @@ ac_build_indexed_store(struct ac_llvm_context *ctx, LLVMValueRef ac_build_indexed_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index, bool uniform); LLVMValueRef ac_build_indexed_load_const(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index); void -ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - unsigned num_channels, - LLVMValueRef vaddr, - LLVMValueRef soffset, - unsigned inst_offset); - -void -ac_build_tbuffer_store(struct ac_llvm_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - unsigned num_channels, - LLVMValueRef vaddr, - LLVMValueRef soffset, - unsigned inst_offset, - unsigned dfmt, - unsigned nfmt, - unsigned offen, - unsigned idxen, - unsigned glc, - unsigned slc, - unsigned tfe); - +ac_build_buffer_store_dword(struct ac_llvm_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vdata, + unsigned num_channels, + LLVMValueRef vaddr, + LLVMValueRef soffset, + unsigned inst_offset, + bool offen, + bool glc, + bool slc); LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels, LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, unsigned inst_offset, unsigned glc, unsigned slc, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 30d48aa..4143b3c 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -3131,26 +3131,24 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, } for (unsigned j = 0; j < length; j++) { LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], ""); LLVMValueRef voffset = LLVMConstInt(ctx->i32, (slot * 4 + j + start) * ctx->gs_max_out_vertices, false); voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, ""); voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->i32, 4, false), ""); out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, ""); - ac_build_tbuffer_store(&ctx->ac, ctx->gsvs_ring, - out_val, 1, - voffset, ctx->gs2vs_offset, 0, - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_NUM_FORMAT_UINT, - 1, 0, 1, 1, 0); + ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring, + out_val, 1, + voffset, ctx->gs2vs_offset, 0, + 1, 1, 1); } idx += slot_inc; } gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex, ctx->i32one, ""); LLVMBuildStore(ctx->builder, gs_next_vertex, ctx->gs_next_vertex); ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id); } @@ -4631,28 +4629,26 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx) } param_index = shader_io_get_unique_index(i); if (param_index > max_output_written) max_output_written = param_index; for (j = 0; j < length; j++) { LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], ""); out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, ""); - ac_build_tbuffer_store(&ctx->ac, + ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, 1, LLVMGetUndef(ctx->i32), ctx->es2gs_offset, (4 * param_index + j + start) * 4, - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_NUM_FORMAT_UINT, - 0, 0, 1, 1, 0); + 0, 1, 1); } } ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16; } static void si_export_mrt_color(struct nir_to_llvm_context *ctx, LLVMValueRef *color, unsigned param, bool is_last) { LLVMValueRef args[9]; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 699fefd..daaf9f1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1040,31 +1040,31 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, value = ac_emit_clamp(&ctx->ac, value); /* Skip LDS stores if there is no LDS read of this output. */ if (!skip_lds_store) lds_store(bld_base, chan_index, dw_addr, value); value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, ""); values[chan_index] = value; if (inst->Dst[0].Register.WriteMask != 0xF && !is_tess_factor) { - ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 1, - buf_addr, base, - 4 * chan_index); + ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, + buf_addr, base, + 4 * chan_index, 1, 1, 1); } } if (inst->Dst[0].Register.WriteMask == 0xF && !is_tess_factor) { LLVMValueRef value = lp_build_gather_values(bld_base->base.gallivm, values, 4); - ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 4, buf_addr, - base, 0); + ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr, + base, 0, 1, 1, 1); } } static LLVMValueRef fetch_input_gs( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct lp_build_context *base = &bld_base->base; @@ -2076,25 +2076,25 @@ static void emit_streamout_output(struct si_shader_context *ctx, case 3: /* as v4i32 (aligned to 4) */ case 4: /* as v4i32 */ vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps))); for (int j = 0; j < num_comps; j++) { vdata = LLVMBuildInsertElement(builder, vdata, out[j], LLVMConstInt(ctx->i32, j, 0), ""); } break; } - ac_build_tbuffer_store_dwords(&ctx->ac, so_buffers[buf_idx], - vdata, num_comps, - so_write_offsets[buf_idx], - LLVMConstInt(ctx->i32, 0, 0), - stream_out->dst_offset * 4); + ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx], + vdata, num_comps, + so_write_offsets[buf_idx], + LLVMConstInt(ctx->i32, 0, 0), + stream_out->dst_offset * 4, 1, 1, 1); } /** * Write streamout data to buffers for vertex stream @p stream (different * vertex streams can occur for GS copy shaders). */ static void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs, unsigned noutput, unsigned stream) { @@ -2404,22 +2404,22 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) ""); LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), invocation_id, lp_build_const_int32(gallivm, i)); LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0, lds_ptr); - ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 4, buffer_addr, - buffer_offset, 0); + ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr, + buffer_offset, 0, 1, 1, 1); } } static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, LLVMValueRef rel_patch_id, LLVMValueRef invocation_id, LLVMValueRef tcs_out_current_patch_data_offset) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; @@ -2517,65 +2517,68 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, tf_base = LLVMGetParam(ctx->main_fn, SI_PARAM_TESS_FACTOR_OFFSET); byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id, lp_build_const_int32(gallivm, 4 * stride), ""); lp_build_if(&inner_if_ctx, gallivm, LLVMBuildICmp(gallivm->builder, LLVMIntEQ, rel_patch_id, bld_base->uint_bld.zero, "")); /* Store the dynamic HS control word. */ - ac_build_tbuffer_store_dwords(&ctx->ac, buffer, - lp_build_const_int32(gallivm, 0x80000000), - 1, lp_build_const_int32(gallivm, 0), tf_base, 0); + ac_build_buffer_store_dword(&ctx->ac, buffer, + lp_build_const_int32(gallivm, 0x80000000), + 1, lp_build_const_int32(gallivm, 0), tf_base, + 0, 1, 1, 1); lp_build_endif(&inner_if_ctx); /* Store the tessellation factors. */ - ac_build_tbuffer_store_dwords(&ctx->ac, buffer, vec0, - MIN2(stride, 4), byteoffset, tf_base, 4); + ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, + MIN2(stride, 4), byteoffset, tf_base, + 4, 1, 1, 1); if (vec1) - ac_build_tbuffer_store_dwords(&ctx->ac, buffer, vec1, - stride - 4, byteoffset, tf_base, 20); + ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, + stride - 4, byteoffset, tf_base, + 20, 1, 1, 1); /* Store the tess factors into the offchip buffer if TES reads them. */ if (shader->key.part.tcs.epilog.tes_reads_tess_factors) { LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset; LLVMValueRef tf_inner_offset; unsigned param_outer, param_inner; buf = ac_build_indexed_load_const(&ctx->ac, rw_buffers, LLVMConstInt(ctx->i32, SI_HS_RING_TESS_OFFCHIP, 0)); base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds); param_outer = si_shader_io_get_unique_index( TGSI_SEMANTIC_TESSOUTER, 0); tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL, LLVMConstInt(ctx->i32, param_outer, 0)); outer_vec = lp_build_gather_values(gallivm, outer, util_next_power_of_two(outer_comps)); - ac_build_tbuffer_store_dwords(&ctx->ac, buf, outer_vec, - outer_comps, tf_outer_offset, - base, 0); + ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, + outer_comps, tf_outer_offset, + base, 0, 1, 1, 1); if (inner_comps) { param_inner = si_shader_io_get_unique_index( TGSI_SEMANTIC_TESSINNER, 0); tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL, LLVMConstInt(ctx->i32, param_inner, 0)); inner_vec = inner_comps == 1 ? inner[0] : lp_build_gather_values(gallivm, inner, inner_comps); - ac_build_tbuffer_store_dwords(&ctx->ac, buf, inner_vec, - inner_comps, tf_inner_offset, - base, 0); + ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, + inner_comps, tf_inner_offset, + base, 0, 1, 1, 1); } } lp_build_endif(&if_ctx); } /* This only writes the tessellation factor levels. */ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); @@ -2681,28 +2684,26 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base) info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER) continue; param_index = si_shader_io_get_unique_index(info->output_semantic_name[i], info->output_semantic_index[i]); for (chan = 0; chan < 4; chan++) { LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""); out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, ""); - ac_build_tbuffer_store(&ctx->ac, - ctx->esgs_ring, - out_val, 1, - LLVMGetUndef(ctx->i32), soffset, - (4 * param_index + chan) * 4, - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_NUM_FORMAT_UINT, - 0, 0, 1, 1, 0); + ac_build_buffer_store_dword(&ctx->ac, + ctx->esgs_ring, + out_val, 1, + LLVMGetUndef(ctx->i32), soffset, + (4 * param_index + chan) * 4, + 0, 1, 1); } } } static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID)); @@ -5049,27 +5050,25 @@ static void si_llvm_emit_vertex( LLVMValueRef voffset = lp_build_const_int32(gallivm, offset * shader->selector->gs_max_out_vertices); offset++; voffset = lp_build_add(uint, voffset, gs_next_vertex); voffset = lp_build_mul_imm(uint, voffset, 4); out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, ""); - ac_build_tbuffer_store(&ctx->ac, - ctx->gsvs_ring[stream], - out_val, 1, - voffset, soffset, 0, - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_NUM_FORMAT_UINT, - 1, 0, 1, 1, 0); + ac_build_buffer_store_dword(&ctx->ac, + ctx->gsvs_ring[stream], + out_val, 1, + voffset, soffset, 0, + 1, 1, 1); } } gs_next_vertex = lp_build_add(uint, gs_next_vertex, lp_build_const_int32(gallivm, 1)); LLVMBuildStore(gallivm->builder, gs_next_vertex, ctx->gs_next_vertex[stream]); /* Signal vertex emission */ ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev