From: Marek Olšák <marek.ol...@amd.com> si_llvm_emit_ddxy is called once per element, so we don't have to generate code for 4 elements at once. --- src/gallium/drivers/radeonsi/si_shader.c | 80 ++++++++++++-------------------- 1 file changed, 29 insertions(+), 51 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 414810e..c150ae4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4998,98 +4998,76 @@ static void si_llvm_emit_txqs( #define TID_MASK_TOP 0xfffffffd #define TID_MASK_LEFT 0xfffffffe static void si_llvm_emit_ddxy( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; - const struct tgsi_full_instruction *inst = emit_data->inst; - unsigned opcode = inst->Instruction.Opcode; - LLVMValueRef store_ptr, load_ptr0, load_ptr1, thread_id; - LLVMValueRef tl, trbl, result[4]; - LLVMValueRef tl_tid, trbl_tid; - unsigned swizzle[4]; - unsigned c; + unsigned opcode = emit_data->info->opcode; + LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, val, args[2]; int idx; unsigned mask; bool has_ds_bpermute = HAVE_LLVM >= 0x0309 && ctx->screen->b.chip_class >= VI; - thread_id = get_thread_id(ctx);; + thread_id = get_thread_id(ctx); if (opcode == TGSI_OPCODE_DDX_FINE) mask = TID_MASK_LEFT; else if (opcode == TGSI_OPCODE_DDY_FINE) mask = TID_MASK_TOP; else mask = TID_MASK_TOP_LEFT; tl_tid = LLVMBuildAnd(gallivm->builder, thread_id, lp_build_const_int32(gallivm, mask), ""); /* for DDX we want to next X pixel, DDY next Y pixel. */ idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2; trbl_tid = LLVMBuildAdd(gallivm->builder, tl_tid, lp_build_const_int32(gallivm, idx), ""); - if (!has_ds_bpermute) { - store_ptr = build_gep0(ctx, ctx->lds, thread_id); - load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid); - load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid); - } - - for (c = 0; c < 4; ++c) { - unsigned i; - LLVMValueRef val; - LLVMValueRef args[2]; - - swizzle[c] = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c); - for (i = 0; i < c; ++i) { - if (swizzle[i] == swizzle[c]) { - result[c] = result[i]; - break; - } - } - if (i != c) - continue; - - val = LLVMBuildBitCast(gallivm->builder, - lp_build_emit_fetch(bld_base, inst, 0, c), - ctx->i32, ""); + val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, ""); - if (has_ds_bpermute) { - args[0] = LLVMBuildMul(gallivm->builder, tl_tid, - lp_build_const_int32(gallivm, 4), ""); - args[1] = val; - tl = lp_build_intrinsic(gallivm->builder, + if (has_ds_bpermute) { + args[0] = LLVMBuildMul(gallivm->builder, tl_tid, + lp_build_const_int32(gallivm, 4), ""); + args[1] = val; + tl = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.ds.bpermute", ctx->i32, args, 2, LLVMReadNoneAttribute); - args[0] = LLVMBuildMul(gallivm->builder, trbl_tid, - lp_build_const_int32(gallivm, 4), ""); - trbl = lp_build_intrinsic(gallivm->builder, - "llvm.amdgcn.ds.bpermute", ctx->i32, - args, 2, LLVMReadNoneAttribute); - } else { - LLVMBuildStore(gallivm->builder, val, store_ptr); - tl = LLVMBuildLoad(gallivm->builder, load_ptr0, ""); - trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, ""); - } - tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, ""); - trbl = LLVMBuildBitCast(gallivm->builder, trbl, ctx->f32, ""); - result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, ""); + args[0] = LLVMBuildMul(gallivm->builder, trbl_tid, + lp_build_const_int32(gallivm, 4), ""); + trbl = lp_build_intrinsic(gallivm->builder, + "llvm.amdgcn.ds.bpermute", ctx->i32, + args, 2, LLVMReadNoneAttribute); + } else { + LLVMValueRef store_ptr, load_ptr0, load_ptr1; + + store_ptr = build_gep0(ctx, ctx->lds, thread_id); + load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid); + load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid); + + LLVMBuildStore(gallivm->builder, val, store_ptr); + tl = LLVMBuildLoad(gallivm->builder, load_ptr0, ""); + trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, ""); } - emit_data->output[0] = lp_build_gather_values(gallivm, result, 4); + tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, ""); + trbl = LLVMBuildBitCast(gallivm->builder, trbl, ctx->f32, ""); + + emit_data->output[emit_data->chan] = + LLVMBuildFSub(gallivm->builder, trbl, tl, ""); } /* * this takes an I,J coordinate pair, * and works out the X and Y derivatives. * it returns DDX(I), DDX(J), DDY(I), DDY(J). */ static LLVMValueRef si_llvm_emit_ddxy_interp( struct lp_build_tgsi_context *bld_base, LLVMValueRef interp_ij) -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev