From: Dave Airlie <airl...@redhat.com>

This is ported from radeonsi, and I can see at least one
Talos shader drops an export due to this, and saves some
VGPR usage.

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 src/amd/common/ac_nir_to_llvm.c | 178 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 178 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ab929bc..38d5359 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -5753,6 +5753,182 @@ static void ac_llvm_finalize_module(struct 
nir_to_llvm_context * ctx)
        LLVMDisposePassManager(passmgr);
 }
 
+#define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
+#define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
+
+/* Return true if the PARAM export has been eliminated. */
+static bool
+ac_eliminate_const_output(struct nir_to_llvm_context *ctx,
+                         struct ac_vs_output_info *outinfo,
+                         LLVMValueRef inst, unsigned offset)
+{
+       unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
+       bool is_zero[4] = {}, is_one[4] = {};
+
+       for (i = 0; i < 4; i++) {
+               LLVMBool loses_info;
+               LLVMValueRef p = LLVMGetOperand(inst, EXP_OUT0 + i);
+
+               /* It's a constant expression. Undef outputs are eliminated 
too. */
+               if (LLVMIsUndef(p)) {
+                       is_zero[i] = true;
+                       is_one[i] = true;
+               } else if (LLVMIsAConstantFP(p)) {
+                       double a = LLVMConstRealGetDouble(p, &loses_info);
+
+                       if (a == 0)
+                               is_zero[i] = true;
+                       else if (a == 1)
+                               is_one[i] = true;
+                       else
+
+                               return false; /* other constant */
+               } else
+                       return false;
+       }
+
+       /* Only certain combinations of 0 and 1 can be eliminated. */
+       if (is_zero[0] && is_zero[1] && is_zero[2])
+               default_val = is_zero[3] ? 0 : 1;
+       else if (is_one[0] && is_one[1] && is_one[2])
+               default_val = is_zero[3] ? 2 : 3;
+       else
+               return false;
+
+       /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
+       LLVMInstructionEraseFromParent(inst);
+
+       /* Change OFFSET to DEFAULT_VAL. */
+       for (i = 0; i < VARYING_SLOT_MAX; i++) {
+               if (outinfo->vs_output_param_offset[i] == offset) {
+                       outinfo->vs_output_param_offset[i] =
+                               EXP_PARAM_DEFAULT_VAL_0000 + default_val;
+                       break;
+               }
+       }
+       return true;
+}
+
+struct si_vs_exports {
+       unsigned num;
+       unsigned offset[VARYING_SLOT_MAX];
+       LLVMValueRef inst[VARYING_SLOT_MAX];
+};
+
+static bool
+ac_is_function(LLVMValueRef v)
+{
+       return LLVMGetValueKind(v) == LLVMFunctionValueKind;
+}
+
+static void
+ac_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx)
+{
+       LLVMBasicBlockRef bb;
+       struct si_vs_exports exports;
+       bool removed_any = false;
+       struct ac_vs_output_info *outinfo;
+       exports.num = 0;
+
+       if (ctx->stage == MESA_SHADER_FRAGMENT ||
+           ctx->stage == MESA_SHADER_COMPUTE ||
+           ctx->stage == MESA_SHADER_TESS_CTRL ||
+           ctx->stage == MESA_SHADER_GEOMETRY)
+               return;
+
+       if (ctx->stage == MESA_SHADER_VERTEX) {
+               if (ctx->options->key.vs.as_ls ||
+                   ctx->options->key.vs.as_es)
+                       return;
+               outinfo = &ctx->shader_info->vs.outinfo;
+       }
+
+       if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+               if (ctx->options->key.vs.as_es)
+                       return;
+               outinfo = &ctx->shader_info->tes.outinfo;
+       }
+
+       /* Process all LLVM instructions. */
+       bb = LLVMGetFirstBasicBlock(ctx->main_function);
+       while (bb) {
+               LLVMValueRef inst = LLVMGetFirstInstruction(bb);
+
+               while (inst) {
+                       LLVMValueRef cur = inst;
+                       inst = LLVMGetNextInstruction(inst);
+
+                       if (LLVMGetInstructionOpcode(cur) != LLVMCall)
+                               continue;
+
+                       LLVMValueRef callee = LLVMGetCalledValue(cur);
+
+                       if (!ac_is_function(callee))
+                               continue;
+
+                       const char *name = LLVMGetValueName(callee);
+                       unsigned num_args = LLVMCountParams(callee);
+
+                       /* Check if this is an export instruction. */
+                       if ((num_args != 9 && num_args != 8) ||
+                           (strcmp(name, "llvm.SI.export") &&
+                            strcmp(name, "llvm.amdgcn.exp.f32")))
+                               continue;
+
+                       LLVMValueRef arg = LLVMGetOperand(cur, EXP_TARGET);
+                       unsigned target = LLVMConstIntGetZExtValue(arg);
+
+                       if (target < V_008DFC_SQ_EXP_PARAM)
+                               continue;
+
+                       target -= V_008DFC_SQ_EXP_PARAM;
+
+                       /* Eliminate constant value PARAM exports. */
+                       if (ac_eliminate_const_output(ctx, outinfo, cur, 
target)) {
+                               removed_any = true;
+                       } else {
+                               exports.offset[exports.num] = target;
+                               exports.inst[exports.num] = cur;
+                               exports.num++;
+                       }
+               }
+               bb = LLVMGetNextBasicBlock(bb);
+       }
+
+       /* Remove holes in export memory due to removed PARAM exports.
+        * This is done by renumbering all PARAM exports.
+        */
+       if (removed_any) {
+               ubyte current_offset[VARYING_SLOT_MAX];
+               unsigned new_count = 0;
+               unsigned out, i;
+
+               /* Make a copy of the offsets. We need the old version while
+                * we are modifying some of them. */
+               assert(sizeof(current_offset) ==
+                      sizeof(outinfo->vs_output_param_offset));
+               memcpy(current_offset, outinfo->vs_output_param_offset,
+                      sizeof(current_offset));
+
+               for (i = 0; i < exports.num; i++) {
+                       unsigned offset = exports.offset[i];
+
+                       for (out = 0; out < VARYING_SLOT_MAX; out++) {
+                               if (current_offset[out] != offset)
+                                       continue;
+
+                               LLVMSetOperand(exports.inst[i], EXP_TARGET,
+                                              LLVMConstInt(ctx->i32,
+                                                           
V_008DFC_SQ_EXP_PARAM + new_count, 0));
+                               outinfo->vs_output_param_offset[out] = 
new_count;
+                               new_count++;
+                               break;
+                       }
+               }
+               outinfo->param_exports = new_count;
+       }
+}
+
 static void
 ac_setup_rings(struct nir_to_llvm_context *ctx)
 {
@@ -5890,6 +6066,8 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
        LLVMBuildRetVoid(ctx.builder);
 
        ac_llvm_finalize_module(&ctx);
+
+       ac_eliminate_const_vs_outputs(&ctx);
        free(ctx.locals);
        ralloc_free(ctx.defs);
        ralloc_free(ctx.phis);
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to