From: Marek Olšák <marek.ol...@amd.com> --- src/amd/common/ac_nir_to_llvm.c | 14 +++-------- src/gallium/drivers/radeonsi/si_shader.c | 8 +++--- .../drivers/radeonsi/si_shader_tgsi_mem.c | 25 +++++++------------ .../drivers/radeonsi/si_shader_tgsi_setup.c | 17 ++++--------- 4 files changed, 20 insertions(+), 44 deletions(-)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 537ac33c044..700e48e14b7 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -479,35 +479,30 @@ static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx, comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, ""); return LLVMBuildBitCast(ctx->builder, ac_build_cvt_pkrtz_f16(ctx, comp), ctx->i32, ""); } static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0) { LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); - LLVMValueRef temps[2], result, val; + LLVMValueRef temps[2], val; int i; for (i = 0; i < 2; i++) { val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0; val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, ""); val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, ""); temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, ""); } - - result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0], - ctx->i32_0, ""); - result = LLVMBuildInsertElement(ctx->builder, result, temps[1], - ctx->i32_1, ""); - return result; + return ac_build_gather_values(ctx, temps, 2); } static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, nir_op op, LLVMValueRef src0) { unsigned mask; int idx; LLVMValueRef result; @@ -997,24 +992,21 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, ""); result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, ""); break; } case nir_op_pack_64_2x32_split: { LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32); - tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, - src[0], ctx->ac.i32_0, ""); - tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, - src[1], ctx->ac.i32_1, ""); + tmp = ac_build_gather_values(&ctx->ac, src, 2); result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, ""); break; } case nir_op_cube_face_coord: { src[0] = ac_to_float(&ctx->ac, src[0]); LLVMValueRef results[2]; LLVMValueRef in[3]; for (unsigned chan = 0; chan < 3; chan++) in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 66fe5fad218..cfd99b61601 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2717,26 +2717,24 @@ static void emit_streamout_output(struct si_shader_context *ctx, /* Pack the output. */ LLVMValueRef vdata = NULL; switch (num_comps) { case 1: /* as i32 */ vdata = out[0]; break; case 2: /* as v2i32 */ case 3: /* as v4i32 (aligned to 4) */ + out[3] = LLVMGetUndef(ctx->i32); + /* fall through */ case 4: /* as v4i32 */ - vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps))); - for (int j = 0; j < num_comps; j++) { - vdata = LLVMBuildInsertElement(ctx->ac.builder, vdata, out[j], - LLVMConstInt(ctx->i32, j, 0), ""); - } + vdata = ac_build_gather_values(&ctx->ac, out, util_next_power_of_two(num_comps)); break; } ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx], vdata, num_comps, so_write_offsets[buf_idx], ctx->i32_0, stream_out->dst_offset * 4, 1, 1, true, false); } diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 54a0413e464..8e0578b4d5e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -546,50 +546,43 @@ static void store_emit_buffer(struct si_shader_context *ctx, unsigned cache_policy, bool writeonly_memory) { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef base_data = value; LLVMValueRef base_offset = voffset; while (writemask) { int start, count; const char *intrinsic_name; - LLVMValueRef data, voff, tmp; + LLVMValueRef data, voff; u_bit_scan_consecutive_range(&writemask, &start, &count); /* Due to an LLVM limitation, split 3-element writes * into a 2-element and a 1-element write. */ if (count == 3) { writemask |= 1 << (start + 2); count = 2; } if (count == 4) { data = base_data; intrinsic_name = "llvm.amdgcn.buffer.store.v4f32"; } else if (count == 2) { - LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2); - - tmp = LLVMBuildExtractElement( - builder, base_data, - LLVMConstInt(ctx->i32, start, 0), ""); - data = LLVMBuildInsertElement( - builder, LLVMGetUndef(v2f32), tmp, - ctx->i32_0, ""); - - tmp = LLVMBuildExtractElement( - builder, base_data, - LLVMConstInt(ctx->i32, start + 1, 0), ""); - data = LLVMBuildInsertElement( - builder, data, tmp, ctx->i32_1, ""); - + LLVMValueRef values[2] = { + LLVMBuildExtractElement(builder, base_data, + LLVMConstInt(ctx->i32, start, 0), ""), + LLVMBuildExtractElement(builder, base_data, + LLVMConstInt(ctx->i32, start + 1, 0), ""), + }; + + data = ac_build_gather_values(&ctx->ac, values, 2); intrinsic_name = "llvm.amdgcn.buffer.store.v2f32"; } else { assert(count == 1); data = LLVMBuildExtractElement( builder, base_data, LLVMConstInt(ctx->i32, start, 0), ""); intrinsic_name = "llvm.amdgcn.buffer.store.f32"; } voff = base_offset; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 1f37b0ba37d..20164939cb7 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -298,32 +298,25 @@ get_pointer_into_array(struct si_shader_context *ctx, return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, ""); } LLVMValueRef si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, LLVMTypeRef type, LLVMValueRef ptr, LLVMValueRef ptr2) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef result; - - result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2)); - - result = LLVMBuildInsertElement(ctx->ac.builder, - result, - ac_to_integer(&ctx->ac, ptr), - ctx->i32_0, ""); - result = LLVMBuildInsertElement(ctx->ac.builder, - result, - ac_to_integer(&ctx->ac, ptr2), - ctx->i32_1, ""); + LLVMValueRef values[2] = { + ac_to_integer(&ctx->ac, ptr), + ac_to_integer(&ctx->ac, ptr2), + }; + LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2); return LLVMBuildBitCast(ctx->ac.builder, result, type, ""); } static LLVMValueRef emit_array_fetch(struct lp_build_tgsi_context *bld_base, unsigned File, enum tgsi_opcode_type type, struct tgsi_declaration_range range, unsigned swizzle) { struct si_shader_context *ctx = si_shader_context(bld_base); -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev