Replying privately. See also http://bugzilla.eng.vmware.com/show_bug.cgi?id=999655#c5
Jose ----- Original Message ----- > Hmm sure it is rarely used (for arb_vp and d3d9 vs 1.1 (2.0 too maybe > though the semantics are different there even if the precision required > is the same)? > The problem I have with this is that the emulation which will get used > instead is _extremely_ terrible. EXP should be a cheaper alternative to > EX2, yet the emulation will make it more than twice as expensive > (because there are _two_ ex2 calls in exp_emit()). > Also, since the exp/log functions actually have configurable precision > (though it is compile-time dependent for now) maybe could exploit that > and use a polynomial with a lesser degree? > Otherwise though having less specialized code makes sense. > > Roland > > > > Am 11.09.2013 13:04, schrieb jfons...@vmware.com: > > From: José Fonseca <jfons...@vmware.com> > > > > It was wrong for EXP.y, as we clamped the source before computing the > > fractional part, and this opcode should be rarely used, so it's not > > worth the hassle. > > --- > > src/gallium/auxiliary/gallivm/lp_bld_arit.c | 80 > > ++++++++-------------- > > src/gallium/auxiliary/gallivm/lp_bld_arit.h | 7 -- > > src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 15 ---- > > 3 files changed, 30 insertions(+), 72 deletions(-) > > > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > > b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > > index 09107ff..00052ed 100644 > > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > > @@ -3001,12 +3001,9 @@ const double lp_build_exp2_polynomial[] = { > > }; > > > > > > -void > > -lp_build_exp2_approx(struct lp_build_context *bld, > > - LLVMValueRef x, > > - LLVMValueRef *p_exp2_int_part, > > - LLVMValueRef *p_frac_part, > > - LLVMValueRef *p_exp2) > > +LLVMValueRef > > +lp_build_exp2(struct lp_build_context *bld, > > + LLVMValueRef x) > > { > > LLVMBuilderRef builder = bld->gallivm->builder; > > const struct lp_type type = bld->type; > > @@ -3019,65 +3016,48 @@ lp_build_exp2_approx(struct lp_build_context *bld, > > > > assert(lp_check_value(bld->type, x)); > > > > - if(p_exp2_int_part || p_frac_part || p_exp2) { > > - /* TODO: optimize the constant case */ > > - if (gallivm_debug & GALLIVM_DEBUG_PERF && > > - LLVMIsConstant(x)) { > > - debug_printf("%s: inefficient/imprecise constant arithmetic\n", > > - __FUNCTION__); > > - } > > > > - assert(type.floating && type.width == 32); > > + /* TODO: optimize the constant case */ > > + if (gallivm_debug & GALLIVM_DEBUG_PERF && > > + LLVMIsConstant(x)) { > > + debug_printf("%s: inefficient/imprecise constant arithmetic\n", > > + __FUNCTION__); > > + } > > > > - /* We want to preserve NaN and make sure than for exp2 if x > 128, > > - * the result is INF and if it's smaller than -126.9 the result is > > 0 */ > > - x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, > > 128.0), x, > > - GALLIVM_NAN_RETURN_SECOND); > > - x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, > > -126.99999), x, > > - GALLIVM_NAN_RETURN_SECOND); > > + assert(type.floating && type.width == 32); > > > > - /* ipart = floor(x) */ > > - /* fpart = x - ipart */ > > - lp_build_ifloor_fract(bld, x, &ipart, &fpart); > > - } > > + /* We want to preserve NaN and make sure than for exp2 if x > 128, > > + * the result is INF and if it's smaller than -126.9 the result is 0 > > */ > > + x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type, > > 128.0), x, > > + GALLIVM_NAN_RETURN_SECOND); > > + x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, > > -126.99999), x, > > + GALLIVM_NAN_RETURN_SECOND); > > > > - if(p_exp2_int_part || p_exp2) { > > - /* expipart = (float) (1 << ipart) */ > > - expipart = LLVMBuildAdd(builder, ipart, > > - lp_build_const_int_vec(bld->gallivm, type, > > 127), ""); > > - expipart = LLVMBuildShl(builder, expipart, > > - lp_build_const_int_vec(bld->gallivm, type, > > 23), ""); > > - expipart = LLVMBuildBitCast(builder, expipart, vec_type, ""); > > - } > > + /* ipart = floor(x) */ > > + /* fpart = x - ipart */ > > + lp_build_ifloor_fract(bld, x, &ipart, &fpart); > > > > - if(p_exp2) { > > - expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, > > - Elements(lp_build_exp2_polynomial)); > > > > - res = LLVMBuildFMul(builder, expipart, expfpart, ""); > > - } > > > > - if(p_exp2_int_part) > > - *p_exp2_int_part = expipart; > > + /* expipart = (float) (1 << ipart) */ > > + expipart = LLVMBuildAdd(builder, ipart, > > + lp_build_const_int_vec(bld->gallivm, type, > > 127), ""); > > + expipart = LLVMBuildShl(builder, expipart, > > + lp_build_const_int_vec(bld->gallivm, type, 23), > > ""); > > + expipart = LLVMBuildBitCast(builder, expipart, vec_type, ""); > > > > - if(p_frac_part) > > - *p_frac_part = fpart; > > > > - if(p_exp2) > > - *p_exp2 = res; > > -} > > + expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial, > > + Elements(lp_build_exp2_polynomial)); > > + > > + res = LLVMBuildFMul(builder, expipart, expfpart, ""); > > > > > > -LLVMValueRef > > -lp_build_exp2(struct lp_build_context *bld, > > - LLVMValueRef x) > > -{ > > - LLVMValueRef res; > > - lp_build_exp2_approx(bld, x, NULL, NULL, &res); > > return res; > > } > > > > > > + > > /** > > * Extract the exponent of a IEEE-754 floating point value. > > * > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h > > b/src/gallium/auxiliary/gallivm/lp_bld_arit.h > > index d98025e..49d4e2c 100644 > > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h > > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h > > @@ -326,13 +326,6 @@ lp_build_ilog2(struct lp_build_context *bld, > > LLVMValueRef x); > > > > void > > -lp_build_exp2_approx(struct lp_build_context *bld, > > - LLVMValueRef x, > > - LLVMValueRef *p_exp2_int_part, > > - LLVMValueRef *p_frac_part, > > - LLVMValueRef *p_exp2); > > - > > -void > > lp_build_log2_approx(struct lp_build_context *bld, > > LLVMValueRef x, > > LLVMValueRef *p_exp, > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > > b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > > index 86c3249..1cfaf78 100644 > > --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > > +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c > > @@ -1057,20 +1057,6 @@ ex2_emit_cpu( > > > > emit_data->args[0]); > > } > > > > -/* TGSI_OPCODE_EXP (CPU Only) */ > > -static void > > -exp_emit_cpu( > > - const struct lp_build_tgsi_action * action, > > - struct lp_build_tgsi_context * bld_base, > > - struct lp_build_emit_data * emit_data) > > -{ > > - lp_build_exp2_approx(&bld_base->base, emit_data->args[0], > > - &emit_data->output[TGSI_CHAN_X], > > - &emit_data->output[TGSI_CHAN_Y], > > - &emit_data->output[TGSI_CHAN_Z]); > > - emit_data->output[TGSI_CHAN_W] = bld_base->base.one; > > -} > > - > > /* TGSI_OPCODE_F2I (CPU Only) */ > > static void > > f2i_emit_cpu( > > @@ -1785,7 +1771,6 @@ lp_set_default_actions_cpu( > > bld_base->op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu; > > bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu; > > bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu; > > - bld_base->op_actions[TGSI_OPCODE_EXP].emit = exp_emit_cpu; > > bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu; > > bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit_cpu; > > bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu; > > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev