From: Dave Airlie <airl...@redhat.com> This is ported from radeonsi, and I can see at least one Talos shader drops an export due to this, and saves some VGPR usage.
Signed-off-by: Dave Airlie <airl...@redhat.com> --- src/amd/common/ac_nir_to_llvm.c | 178 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index ab929bc..38d5359 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -5753,6 +5753,182 @@ static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx) LLVMDisposePassManager(passmgr); } +#define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3) +#define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5) + +/* Return true if the PARAM export has been eliminated. */ +static bool +ac_eliminate_const_output(struct nir_to_llvm_context *ctx, + struct ac_vs_output_info *outinfo, + LLVMValueRef inst, unsigned offset) +{ + unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ + bool is_zero[4] = {}, is_one[4] = {}; + + for (i = 0; i < 4; i++) { + LLVMBool loses_info; + LLVMValueRef p = LLVMGetOperand(inst, EXP_OUT0 + i); + + /* It's a constant expression. Undef outputs are eliminated too. */ + if (LLVMIsUndef(p)) { + is_zero[i] = true; + is_one[i] = true; + } else if (LLVMIsAConstantFP(p)) { + double a = LLVMConstRealGetDouble(p, &loses_info); + + if (a == 0) + is_zero[i] = true; + else if (a == 1) + is_one[i] = true; + else + + return false; /* other constant */ + } else + return false; + } + + /* Only certain combinations of 0 and 1 can be eliminated. */ + if (is_zero[0] && is_zero[1] && is_zero[2]) + default_val = is_zero[3] ? 0 : 1; + else if (is_one[0] && is_one[1] && is_one[2]) + default_val = is_zero[3] ? 2 : 3; + else + return false; + + /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ + LLVMInstructionEraseFromParent(inst); + + /* Change OFFSET to DEFAULT_VAL. */ + for (i = 0; i < VARYING_SLOT_MAX; i++) { + if (outinfo->vs_output_param_offset[i] == offset) { + outinfo->vs_output_param_offset[i] = + EXP_PARAM_DEFAULT_VAL_0000 + default_val; + break; + } + } + return true; +} + +struct si_vs_exports { + unsigned num; + unsigned offset[VARYING_SLOT_MAX]; + LLVMValueRef inst[VARYING_SLOT_MAX]; +}; + +static bool +ac_is_function(LLVMValueRef v) +{ + return LLVMGetValueKind(v) == LLVMFunctionValueKind; +} + +static void +ac_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx) +{ + LLVMBasicBlockRef bb; + struct si_vs_exports exports; + bool removed_any = false; + struct ac_vs_output_info *outinfo; + exports.num = 0; + + if (ctx->stage == MESA_SHADER_FRAGMENT || + ctx->stage == MESA_SHADER_COMPUTE || + ctx->stage == MESA_SHADER_TESS_CTRL || + ctx->stage == MESA_SHADER_GEOMETRY) + return; + + if (ctx->stage == MESA_SHADER_VERTEX) { + if (ctx->options->key.vs.as_ls || + ctx->options->key.vs.as_es) + return; + outinfo = &ctx->shader_info->vs.outinfo; + } + + if (ctx->stage == MESA_SHADER_TESS_EVAL) { + if (ctx->options->key.vs.as_es) + return; + outinfo = &ctx->shader_info->tes.outinfo; + } + + /* Process all LLVM instructions. */ + bb = LLVMGetFirstBasicBlock(ctx->main_function); + while (bb) { + LLVMValueRef inst = LLVMGetFirstInstruction(bb); + + while (inst) { + LLVMValueRef cur = inst; + inst = LLVMGetNextInstruction(inst); + + if (LLVMGetInstructionOpcode(cur) != LLVMCall) + continue; + + LLVMValueRef callee = LLVMGetCalledValue(cur); + + if (!ac_is_function(callee)) + continue; + + const char *name = LLVMGetValueName(callee); + unsigned num_args = LLVMCountParams(callee); + + /* Check if this is an export instruction. */ + if ((num_args != 9 && num_args != 8) || + (strcmp(name, "llvm.SI.export") && + strcmp(name, "llvm.amdgcn.exp.f32"))) + continue; + + LLVMValueRef arg = LLVMGetOperand(cur, EXP_TARGET); + unsigned target = LLVMConstIntGetZExtValue(arg); + + if (target < V_008DFC_SQ_EXP_PARAM) + continue; + + target -= V_008DFC_SQ_EXP_PARAM; + + /* Eliminate constant value PARAM exports. */ + if (ac_eliminate_const_output(ctx, outinfo, cur, target)) { + removed_any = true; + } else { + exports.offset[exports.num] = target; + exports.inst[exports.num] = cur; + exports.num++; + } + } + bb = LLVMGetNextBasicBlock(bb); + } + + /* Remove holes in export memory due to removed PARAM exports. + * This is done by renumbering all PARAM exports. + */ + if (removed_any) { + ubyte current_offset[VARYING_SLOT_MAX]; + unsigned new_count = 0; + unsigned out, i; + + /* Make a copy of the offsets. We need the old version while + * we are modifying some of them. */ + assert(sizeof(current_offset) == + sizeof(outinfo->vs_output_param_offset)); + memcpy(current_offset, outinfo->vs_output_param_offset, + sizeof(current_offset)); + + for (i = 0; i < exports.num; i++) { + unsigned offset = exports.offset[i]; + + for (out = 0; out < VARYING_SLOT_MAX; out++) { + if (current_offset[out] != offset) + continue; + + LLVMSetOperand(exports.inst[i], EXP_TARGET, + LLVMConstInt(ctx->i32, + V_008DFC_SQ_EXP_PARAM + new_count, 0)); + outinfo->vs_output_param_offset[out] = new_count; + new_count++; + break; + } + } + outinfo->param_exports = new_count; + } +} + static void ac_setup_rings(struct nir_to_llvm_context *ctx) { @@ -5890,6 +6066,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, LLVMBuildRetVoid(ctx.builder); ac_llvm_finalize_module(&ctx); + + ac_eliminate_const_vs_outputs(&ctx); free(ctx.locals); ralloc_free(ctx.defs); ralloc_free(ctx.phis); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev