On Apr 27, 2017 10:50 AM, "Juan A. Suarez Romero" <jasua...@igalia.com> wrote:
On Wed, 2017-04-26 at 09:12 +1000, Dave Airlie wrote: > From: Dave Airlie <airl...@redhat.com> > > This code can be shared by radv, we bump the max to > VARYING_SLOT_MAX here, but that shouldn't have too > much fallout. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/amd/common/ac_exp_param.h | 40 ++++++ > src/amd/common/ac_llvm_build.c | 156 +++++++++++++++++++++++- > src/amd/common/ac_llvm_build.h | 6 + > src/amd/common/ac_llvm_helper.cpp | 20 +++ > src/amd/common/ac_llvm_util.h | 2 + > src/gallium/drivers/radeonsi/si_shader.c | 152 ++--------------------- > src/gallium/drivers/radeonsi/si_shader.h | 12 -- > src/gallium/drivers/radeonsi/si_state_shaders.c | 13 +- > 8 files changed, 237 insertions(+), 164 deletions(-) > create mode 100644 src/amd/common/ac_exp_param.h > > diff --git a/src/amd/common/ac_exp_param.h b/src/amd/common/ac_exp_param.h > new file mode 100644 > index 0000000..b97ce81 > --- /dev/null > +++ b/src/amd/common/ac_exp_param.h > @@ -0,0 +1,40 @@ > +/* > + * Copyright 2014 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the > + * "Software"), to deal in the Software without restriction, including > + * without limitation the rights to use, copy, modify, merge, publish, > + * distribute, sub license, and/or sell copies of the Software, and to > + * permit persons to whom the Software is furnished to do so, subject to > + * the following conditions: > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, > + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE > + * USE OR OTHER DEALINGS IN THE SOFTWARE. > + * > + * The above copyright notice and this permission notice (including the > + * next paragraph) shall be included in all copies or substantial portions > + * of the Software. > + * > + */ > +#ifndef AC_EXP_PARAM_H > +#define AC_EXP_PARAM_H > + > +enum { > + /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */ > + AC_EXP_PARAM_OFFSET_0 = 0, > + AC_EXP_PARAM_OFFSET_31 = 31, > + /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */ > + AC_EXP_PARAM_DEFAULT_VAL_0000 = 64, > + AC_EXP_PARAM_DEFAULT_VAL_0001, > + AC_EXP_PARAM_DEFAULT_VAL_1110, > + AC_EXP_PARAM_DEFAULT_VAL_1111, > + AC_EXP_PARAM_UNDEFINED = 255, > +}; > + > +#endif > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_ build.c > index d45094c..f452f3e 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -33,11 +33,13 @@ > #include <stdio.h> > > #include "ac_llvm_util.h" > - > +#include "ac_exp_param.h" > #include "util/bitscan.h" > #include "util/macros.h" > #include "sid.h" > > +#include "shader_enums.h" > + > /* Initialize module-independent parts of the context. > * > * The caller is responsible for initializing ctx::module and ctx::builder. > @@ -1244,3 +1246,155 @@ void ac_get_image_intr_name(const char *base_name, > data_type_name, coords_type_name, rsrc_type_name); > } > } > + > +#define AC_EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3) > +#define AC_EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5) > + > +/* Return true if the PARAM export has been eliminated. */ > +static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset, > + uint32_t num_outputs, > + LLVMValueRef inst, unsigned offset) > +{ > + unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ > + bool is_zero[4] = {}, is_one[4] = {}; > + > + for (i = 0; i < 4; i++) { > + LLVMBool loses_info; > + LLVMValueRef p = LLVMGetOperand(inst, AC_EXP_OUT0 + i); > + > + /* It's a constant expression. Undef outputs are eliminated too. */ > + if (LLVMIsUndef(p)) { > + is_zero[i] = true; > + is_one[i] = true; > + } else if (LLVMIsAConstantFP(p)) { > + double a = LLVMConstRealGetDouble(p, &loses_info); > + > + if (a == 0) > + is_zero[i] = true; > + else if (a == 1) > + is_one[i] = true; > + else > + return false; /* other constant */ > + } else > + return false; > + } > + > + /* Only certain combinations of 0 and 1 can be eliminated. */ > + if (is_zero[0] && is_zero[1] && is_zero[2]) > + default_val = is_zero[3] ? 0 : 1; > + else if (is_one[0] && is_one[1] && is_one[2]) > + default_val = is_zero[3] ? 2 : 3; > + else > + return false; > + > + /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ > + LLVMInstructionEraseFromParent(inst); > + > + /* Change OFFSET to DEFAULT_VAL. */ > + for (i = 0; i < num_outputs; i++) { > + if (vs_output_param_offset[i] == offset) { > + vs_output_param_offset[i] = > + AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val; > + break; > + } > + } > + return true; > +} > + > +struct ac_vs_exports { > + unsigned num; > + unsigned offset[VARYING_SLOT_MAX]; > + LLVMValueRef inst[VARYING_SLOT_MAX]; > +}; > + > +void ac_eliminate_const_vs_outputs(struct ac_llvm_context *ctx, > + LLVMValueRef main_fn, > + uint8_t *vs_output_param_offset, > + uint32_t num_outputs, > + uint8_t *num_param_exports) > +{ > + LLVMBasicBlockRef bb; > + bool removed_any = false; > + struct ac_vs_exports exports; > + > + assert(num_outputs < VARYING_SLOT_MAX); > + exports.num = 0; > + > + /* Process all LLVM instructions. */ > + bb = LLVMGetFirstBasicBlock(main_fn); > + while (bb) { > + LLVMValueRef inst = LLVMGetFirstInstruction(bb); > + > + while (inst) { > + LLVMValueRef cur = inst; > + inst = LLVMGetNextInstruction(inst); > + > + if (LLVMGetInstructionOpcode(cur) != LLVMCall) > + continue; > + > + LLVMValueRef callee = ac_llvm_get_called_value(cur); > + > + if (!ac_llvm_is_function(callee)) > + continue; > + > + const char *name = LLVMGetValueName(callee); > + unsigned num_args = LLVMCountParams(callee); > + > + /* Check if this is an export instruction. */ > + if ((num_args != 9 && num_args != 8) || > + (strcmp(name, "llvm.SI.export") && > + strcmp(name, "llvm.amdgcn.exp.f32"))) > + continue; > + > + LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET); > + unsigned target = LLVMConstIntGetZExtValue(arg); > + > + if (target < V_008DFC_SQ_EXP_PARAM) > + continue; > + > + target -= V_008DFC_SQ_EXP_PARAM; > + > + /* Eliminate constant value PARAM exports. */ > + if (ac_eliminate_const_output(vs_ output_param_offset, > + num_outputs, cur, target)) { > + removed_any = true; > + } else { > + exports.offset[exports.num] = target; > + exports.inst[exports.num] = cur; > + exports.num++; > + } > + } > + bb = LLVMGetNextBasicBlock(bb); > + } > + > + /* Remove holes in export memory due to removed PARAM exports. > + * This is done by renumbering all PARAM exports. > + */ > + if (removed_any) { > + uint8_t current_offset[VARYING_SLOT_MAX]; > + unsigned new_count = 0; > + unsigned out, i; > + > + /* Make a copy of the offsets. We need the old version while > + * we are modifying some of them. */ > + memcpy(current_offset, vs_output_param_offset, > + sizeof(current_offset)); > + > + for (i = 0; i < exports.num; i++) { > + unsigned offset = exports.offset[i]; > + > + for (out = 0; out < num_outputs; out++) { > + if (current_offset[out] != offset) > + continue; > + > + LLVMSetOperand(exports.inst[i], AC_EXP_TARGET, > + LLVMConstInt(ctx->i32, > + V_008DFC_SQ_EXP_PARAM + new_count, 0)); > + vs_output_param_offset[out] = new_count; > + new_count++; > + break; > + } > + } > + *num_param_exports = new_count; > + } > +} > diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_ build.h > index d6edcde..1c3610a 100644 > --- a/src/amd/common/ac_llvm_build.h > +++ b/src/amd/common/ac_llvm_build.h > @@ -239,6 +239,12 @@ void ac_get_image_intr_name(const char *base_name, > LLVMTypeRef coords_type, > LLVMTypeRef rsrc_type, > char *out_name, unsigned out_len); > + > +void ac_eliminate_const_vs_outputs(struct ac_llvm_context *ac, > + LLVMValueRef main_fn, > + uint8_t *vs_output_param_offset, > + uint32_t num_outputs, > + uint8_t *num_param_exports); > #ifdef __cplusplus > } > #endif > diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_ helper.cpp > index 11fa809..582a8f7 100644 > --- a/src/amd/common/ac_llvm_helper.cpp > +++ b/src/amd/common/ac_llvm_helper.cpp > @@ -61,3 +61,23 @@ bool ac_is_sgpr_param(LLVMValueRef arg) > return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) || > AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg); > } > + > +LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call) > +{ > +#if HAVE_LLVM >= 0x0309 > + return LLVMGetCalledValue(call); > +#elif HAVE_LLVM >= 0x0305 > + return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm:: Instruction>(call)).getCalledValue()); This patch is breaking build when using LLVM 3.8.1. Can you share the build error with us? BTW, I have a patch that removes support for LLVM 3.8. Marek > +#else > + return NULL; /* radeonsi doesn't support so old LLVM. */ > +#endif > +} > + > +bool ac_llvm_is_function(LLVMValueRef v) > +{ > +#if HAVE_LLVM >= 0x0309 > + return LLVMGetValueKind(v) == LLVMFunctionValueKind; > +#else > + return llvm::isa<llvm::Function>(llvm::unwrap(v)); > +#endif > +} > diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h > index faecf1e..38e7dde 100644 > --- a/src/amd/common/ac_llvm_util.h > +++ b/src/amd/common/ac_llvm_util.h > @@ -64,6 +64,8 @@ void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, > unsigned attrib_mask); > void ac_dump_module(LLVMModuleRef module); > > +LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call); > +bool ac_llvm_is_function(LLVMValueRef v); > #ifdef __cplusplus > } > #endif > diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c > index 5d7175d..27d88b1 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -41,6 +41,7 @@ > > #include "ac_binary.h" > #include "ac_llvm_util.h" > +#include "ac_exp_param.h" > #include "si_shader_internal.h" > #include "si_pipe.h" > #include "sid.h" > @@ -6793,76 +6794,10 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, > bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier; > } > > -#define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3) > -#define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5) > - > -/* Return true if the PARAM export has been eliminated. */ > -static bool si_eliminate_const_output(struct si_shader_context *ctx, > - LLVMValueRef inst, unsigned offset) > -{ > - struct si_shader *shader = ctx->shader; > - unsigned num_outputs = shader->selector->info.num_outputs; > - unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */ > - bool is_zero[4] = {}, is_one[4] = {}; > - > - for (i = 0; i < 4; i++) { > - LLVMBool loses_info; > - LLVMValueRef p = LLVMGetOperand(inst, EXP_OUT0 + i); > - > - /* It's a constant expression. Undef outputs are eliminated too. */ > - if (LLVMIsUndef(p)) { > - is_zero[i] = true; > - is_one[i] = true; > - } else if (LLVMIsAConstantFP(p)) { > - double a = LLVMConstRealGetDouble(p, &loses_info); > - > - if (a == 0) > - is_zero[i] = true; > - else if (a == 1) > - is_one[i] = true; > - else > - return false; /* other constant */ > - } else > - return false; > - } > - > - /* Only certain combinations of 0 and 1 can be eliminated. */ > - if (is_zero[0] && is_zero[1] && is_zero[2]) > - default_val = is_zero[3] ? 0 : 1; > - else if (is_one[0] && is_one[1] && is_one[2]) > - default_val = is_zero[3] ? 2 : 3; > - else > - return false; > - > - /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */ > - LLVMInstructionEraseFromParent(inst); > - > - /* Change OFFSET to DEFAULT_VAL. */ > - for (i = 0; i < num_outputs; i++) { > - if (shader->info.vs_output_param_offset[i] == offset) { > - shader->info.vs_output_param_offset[i] = > - EXP_PARAM_DEFAULT_VAL_0000 + default_val; > - break; > - } > - } > - return true; > -} > - > -struct si_vs_exports { > - unsigned num; > - unsigned offset[SI_MAX_VS_OUTPUTS]; > - LLVMValueRef inst[SI_MAX_VS_OUTPUTS]; > -}; > - > static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx) > { > struct si_shader *shader = ctx->shader; > struct tgsi_shader_info *info = &shader->selector->info; > - LLVMBasicBlockRef bb; > - struct si_vs_exports exports; > - bool removed_any = false; > - > - exports.num = 0; > > if (ctx->type == PIPE_SHADER_FRAGMENT || > ctx->type == PIPE_SHADER_COMPUTE || > @@ -6870,84 +6805,11 @@ static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx) > shader->key.as_ls) > return; > > - /* Process all LLVM instructions. */ > - bb = LLVMGetFirstBasicBlock(ctx->main_fn); > - while (bb) { > - LLVMValueRef inst = LLVMGetFirstInstruction(bb); > - > - while (inst) { > - LLVMValueRef cur = inst; > - inst = LLVMGetNextInstruction(inst); > - > - if (LLVMGetInstructionOpcode(cur) != LLVMCall) > - continue; > - > - LLVMValueRef callee = lp_get_called_value(cur); > - > - if (!lp_is_function(callee)) > - continue; > - > - const char *name = LLVMGetValueName(callee); > - unsigned num_args = LLVMCountParams(callee); > - > - /* Check if this is an export instruction. */ > - if ((num_args != 9 && num_args != 8) || > - (strcmp(name, "llvm.SI.export") && > - strcmp(name, "llvm.amdgcn.exp.f32"))) > - continue; > - > - LLVMValueRef arg = LLVMGetOperand(cur, EXP_TARGET); > - unsigned target = LLVMConstIntGetZExtValue(arg); > - > - if (target < V_008DFC_SQ_EXP_PARAM) > - continue; > - > - target -= V_008DFC_SQ_EXP_PARAM; > - > - /* Eliminate constant value PARAM exports. */ > - if (si_eliminate_const_output(ctx, cur, target)) { > - removed_any = true; > - } else { > - exports.offset[exports.num] = target; > - exports.inst[exports.num] = cur; > - exports.num++; > - } > - } > - bb = LLVMGetNextBasicBlock(bb); > - } > - > - /* Remove holes in export memory due to removed PARAM exports. > - * This is done by renumbering all PARAM exports. > - */ > - if (removed_any) { > - ubyte current_offset[SI_MAX_VS_OUTPUTS]; > - unsigned new_count = 0; > - unsigned out, i; > - > - /* Make a copy of the offsets. We need the old version while > - * we are modifying some of them. */ > - assert(sizeof(current_offset) == > - sizeof(shader->info.vs_output_param_offset)); > - memcpy(current_offset, shader->info.vs_output_param_offset, > - sizeof(current_offset)); > - > - for (i = 0; i < exports.num; i++) { > - unsigned offset = exports.offset[i]; > - > - for (out = 0; out < info->num_outputs; out++) { > - if (current_offset[out] != offset) > - continue; > - > - LLVMSetOperand(exports.inst[i], EXP_TARGET, > - LLVMConstInt(ctx->i32, > - V_008DFC_SQ_EXP_PARAM + new_count, 0)); > - shader->info.vs_output_param_offset[out] = new_count; > - new_count++; > - break; > - } > - } > - shader->info.nr_param_exports = new_count; > - } > + ac_eliminate_const_vs_outputs(&ctx->ac, > + ctx->main_fn, > + shader->info.vs_output_param_offset, > + info->num_outputs, > + &shader->info.nr_param_exports); > } > > static void si_count_scratch_private_memory(struct si_shader_context *ctx) > @@ -7521,7 +7383,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, > si_init_shader_ctx(&ctx, sscreen, shader, tm); > ctx.separate_prolog = !is_monolithic; > > - memset(shader->info.vs_output_param_offset, EXP_PARAM_UNDEFINED, > + memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, > sizeof(shader->info.vs_output_param_offset)); > > shader->info.uses_instanceid = sel->info.uses_instanceid; > diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h > index cfa691b..2dfb567 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.h > +++ b/src/gallium/drivers/radeonsi/si_shader.h > @@ -486,18 +486,6 @@ struct si_shader_config { > unsigned rsrc2; > }; > > -enum { > - /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */ > - EXP_PARAM_OFFSET_0 = 0, > - EXP_PARAM_OFFSET_31 = 31, > - /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */ > - EXP_PARAM_DEFAULT_VAL_0000 = 64, > - EXP_PARAM_DEFAULT_VAL_0001, > - EXP_PARAM_DEFAULT_VAL_1110, > - EXP_PARAM_DEFAULT_VAL_1111, > - EXP_PARAM_UNDEFINED = 255, > -}; > - > /* GCN-specific shader info. */ > struct si_shader_info { > ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS]; > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c > index 21185c3..baf1eae 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -39,6 +39,7 @@ > > #include "util/disk_cache.h" > #include "util/mesa-sha1.h" > +#include "ac_exp_param.h" > > /* SHADER_CACHE */ > > @@ -1506,7 +1507,7 @@ void si_init_shader_selector_async(void *job, int thread_index) > for (i = 0; i < sel->info.num_outputs; i++) { > unsigned offset = shader->info.vs_output_param_offset[i]; > > - if (offset <= EXP_PARAM_OFFSET_31) > + if (offset <= AC_EXP_PARAM_OFFSET_31) > continue; > > unsigned name = sel->info.output_semantic_ name[i]; > @@ -2001,18 +2002,18 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, > index == vsinfo->output_semantic_index[j]) { > offset = vs->info.vs_output_param_offset[j]; > > - if (offset <= EXP_PARAM_OFFSET_31) { > + if (offset <= AC_EXP_PARAM_OFFSET_31) { > /* The input is loaded from parameter memory. */ > ps_input_cntl |= S_028644_OFFSET(offset); > } else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) { > - if (offset == EXP_PARAM_UNDEFINED) { > + if (offset == AC_EXP_PARAM_UNDEFINED) { > /* This can happen with depth-only rendering. */ > offset = 0; > } else { > /* The input is a DEFAULT_VAL constant. */ > - assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 && > - offset <= EXP_PARAM_DEFAULT_VAL_1111); > - offset -= EXP_PARAM_DEFAULT_VAL_0000; > + assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && > + offset <= AC_EXP_PARAM_DEFAULT_VAL_1111); > + offset -= AC_EXP_PARAM_DEFAULT_VAL_0000; > } > > ps_input_cntl = S_028644_OFFSET(0x20) | _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev