On 12/09/2016 08:07 AM, Alex Bennée wrote: > > Richard Henderson <r...@twiddle.net> writes: > >> Particularly when andc is also available, this is two insns >> shorter than using clz to compute ctz. >> >> Signed-off-by: Richard Henderson <r...@twiddle.net> >> --- >> tcg/tcg-op.c | 107 >> ++++++++++++++++++++++++++++++++++++----------------------- >> 1 file changed, 65 insertions(+), 42 deletions(-) >> >> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c >> index 6f4b1b6..d1debde 100644 >> --- a/tcg/tcg-op.c >> +++ b/tcg/tcg-op.c >> @@ -497,43 +497,46 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, >> TCGv_i32 arg2) > <snip> >> } else { >> - gen_helper_ctz_i32(ret, arg1, arg2); >> + TCGv_i32 z, t; >> + if (TCG_TARGET_HAS_ctpop_i32 && TCG_TARGET_HAS_andc_i32) { >> + t = tcg_temp_new_i32(); >> + tcg_gen_subi_i32(t, arg1, 1); >> + tcg_gen_andc_i32(t, t, arg1); >> + tcg_gen_ctpop_i32(t, t); >> + do_movc: > > Hmmm and... > > <snip> >> void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) >> @@ -1842,18 +1845,29 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, >> TCGv_i64 arg2) >> { >> if (TCG_TARGET_HAS_ctz_i64) { >> tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2); > <snip> >> } else { >> - gen_helper_ctz_i64(ret, arg1, arg2); >> + TCGv_i64 z, t; >> + if (TCG_TARGET_HAS_ctpop_i64 && TCG_TARGET_HAS_andc_i64) { >> + t = tcg_temp_new_i64(); >> + tcg_gen_subi_i64(t, arg1, 1); >> + tcg_gen_andc_i64(t, t, arg1); >> + tcg_gen_ctpop_i64(t, t); >> + do_movc: > > Hmmm. > > So I'm not a goto hater as it makes sense for a bunch of things. But > this seems just a little too liberal usage to my eyes. What's wrong with > a little extra nesting (seeing the compiler sorts it all out in the > end): > > if ((TCG_TARGET_HAS_ctpop_i32 && TCG_TARGET_HAS_andc_i32) > || TCG_TARGET_HAS_clz_i32 || TCG_TARGET_HAS_clz_i64) { > > TCGv_i32 z, t; > > if (TCG_TARGET_HAS_ctpop_i32 && TCG_TARGET_HAS_andc_i32) { > t = tcg_temp_new_i32(); > tcg_gen_subi_i32(t, arg1, 1); > tcg_gen_andc_i32(t, t, arg1); > tcg_gen_ctpop_i32(t, t); > } else if (TCG_TARGET_HAS_clz_i32 || TCG_TARGET_HAS_clz_i64) { > /* Since all non-x86 hosts have clz(0) == 32, don't fight it. > */ > t = tcg_temp_new_i32(); > tcg_gen_neg_i32(t, arg1); > tcg_gen_and_i32(t, t, arg1); > tcg_gen_clzi_i32(t, t, 32); > tcg_gen_xori_i32(t, t, 31); > } > /* final movc */ > z = tcg_const_i32(0); > tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t); > tcg_temp_free_i32(t); > tcg_temp_free_i32(z); > } else { > gen_helper_ctz_i32(ret, arg1, arg2); > }
That does look better. Thanks, r~