From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeon/radeon_llvm.h | 4 +++ .../drivers/radeon/radeon_setup_tgsi_llvm.c | 29 ++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_shader.c | 19 +------------- 3 files changed, 34 insertions(+), 18 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 0276ef3..da5b7f5 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -109,20 +109,24 @@ struct radeon_llvm_context { struct gallivm_state gallivm; }; LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, enum tgsi_opcode_type type); LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, enum tgsi_opcode_type type, LLVMValueRef value); +LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx, + LLVMValueRef index, + unsigned num); + void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data, LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg); void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple, const struct tgsi_shader_info *info, const struct tgsi_token *tokens); diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index dd7d60b..7cdf228 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -66,20 +66,49 @@ LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, { LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type); if (dst_type) return LLVMBuildBitCast(builder, value, dst_type, ""); else return value; } +/** + * Return a value that is equal to the given i32 \p index if it lies in [0,num) + * or an undefined value in the same interval otherwise. + */ +LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx, + LLVMValueRef index, + unsigned num) +{ + struct gallivm_state *gallivm = &ctx->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef c_max = lp_build_const_int32(gallivm, num - 1); + LLVMValueRef cc; + + if (util_is_power_of_two(num)) { + index = LLVMBuildAnd(builder, index, c_max, ""); + } else { + /* In theory, this MAX pattern should result in code that is + * as good as the bit-wise AND above. + * + * In practice, LLVM generates worse code (at the time of + * writing), because its value tracking is not strong enough. + */ + cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, ""); + index = LLVMBuildSelect(builder, cc, index, c_max, ""); + } + + return index; +} + static struct radeon_llvm_loop *get_current_loop(struct radeon_llvm_context *ctx) { return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL; } static struct radeon_llvm_branch *get_current_branch(struct radeon_llvm_context *ctx) { return ctx->branch_depth > 0 ? ctx->branch + (ctx->branch_depth - 1) : NULL; } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 06b5c9c..a5b566e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -558,47 +558,30 @@ static LLVMValueRef get_indirect_index(struct si_shader_context *ctx, } /** * Like get_indirect_index, but restricts the return value to a (possibly * undefined) value inside [0..num). */ static LLVMValueRef get_bounded_indirect_index(struct si_shader_context *ctx, const struct tgsi_ind_register *ind, int rel_index, unsigned num) { - struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; - LLVMBuilderRef builder = gallivm->builder; LLVMValueRef result = get_indirect_index(ctx, ind, rel_index); - LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0); - LLVMValueRef cc; /* LLVM 3.8: If indirect resource indexing is used: * - SI & CIK hang * - VI crashes */ if (HAVE_LLVM <= 0x0308) return LLVMGetUndef(ctx->i32); - if (util_is_power_of_two(num)) { - result = LLVMBuildAnd(builder, result, c_max, ""); - } else { - /* In theory, this MAX pattern should result in code that is - * as good as the bit-wise AND above. - * - * In practice, LLVM generates worse code (at the time of - * writing), because its value tracking is not strong enough. - */ - cc = LLVMBuildICmp(builder, LLVMIntULE, result, c_max, ""); - result = LLVMBuildSelect(builder, cc, result, c_max, ""); - } - - return result; + return radeon_llvm_bound_index(&ctx->radeon_bld, result, num); } /** * Calculate a dword address given an input or output register and a stride. */ static LLVMValueRef get_dw_address(struct si_shader_context *ctx, const struct tgsi_full_dst_register *dst, const struct tgsi_full_src_register *src, LLVMValueRef vertex_dw_stride, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev