On 4/24/19 12:29 AM, Richard Henderson wrote: > On 4/23/19 3:09 PM, Philippe Mathieu-Daudé wrote: >> On 4/23/19 8:37 PM, David Hildenbrand wrote: >>> On 20.04.19 09:34, Richard Henderson wrote: >>>> Remove a function of the same name from target/arm/. >>>> Use a branchless implementation of abs that gcc uses for x86. >>>> >>>> Signed-off-by: Richard Henderson <richard.hender...@linaro.org> >>>> --- >>>> tcg/tcg-op.h | 5 +++++ >>>> target/arm/translate.c | 10 ---------- >>>> tcg/tcg-op.c | 20 ++++++++++++++++++++ >>>> 3 files changed, 25 insertions(+), 10 deletions(-) >>>> >>>> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h >>>> index 472b73cb38..660fe205d0 100644 >>>> --- a/tcg/tcg-op.h >>>> +++ b/tcg/tcg-op.h >>>> @@ -335,6 +335,7 @@ void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, >>>> TCGv_i32 arg2); >>>> void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); >>>> void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); >>>> void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); >>>> +void tcg_gen_abs_i32(TCGv_i32, TCGv_i32); >>>> >>>> static inline void tcg_gen_discard_i32(TCGv_i32 arg) >>>> { >>>> @@ -534,6 +535,7 @@ void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, >>>> TCGv_i64 arg2); >>>> void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); >>>> void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); >>>> void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); >>>> +void tcg_gen_abs_i64(TCGv_i64, TCGv_i64); >>>> >>>> #if TCG_TARGET_REG_BITS == 64 >>>> static inline void tcg_gen_discard_i64(TCGv_i64 arg) >>>> @@ -973,6 +975,7 @@ void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, >>>> TCGv_vec a, TCGv_vec b); >>>> void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); >>>> void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a); >>>> void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a); >>>> +void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a); >>>> void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); >>>> void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); >>>> void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b); >>>> @@ -1019,6 +1022,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, >>>> TCGArg offset, TCGType t); >>>> #define tcg_gen_addi_tl tcg_gen_addi_i64 >>>> #define tcg_gen_sub_tl tcg_gen_sub_i64 >>>> #define tcg_gen_neg_tl tcg_gen_neg_i64 >>>> +#define tcg_gen_abs_tl tcg_gen_abs_i64 >>>> #define tcg_gen_subfi_tl tcg_gen_subfi_i64 >>>> #define tcg_gen_subi_tl tcg_gen_subi_i64 >>>> #define tcg_gen_and_tl tcg_gen_and_i64 >>>> @@ -1131,6 +1135,7 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, >>>> TCGArg offset, TCGType t); >>>> #define tcg_gen_addi_tl tcg_gen_addi_i32 >>>> #define tcg_gen_sub_tl tcg_gen_sub_i32 >>>> #define tcg_gen_neg_tl tcg_gen_neg_i32 >>>> +#define tcg_gen_abs_tl tcg_gen_abs_i32 >>>> #define tcg_gen_subfi_tl tcg_gen_subfi_i32 >>>> #define tcg_gen_subi_tl tcg_gen_subi_i32 >>>> #define tcg_gen_and_tl tcg_gen_and_i32 >>>> diff --git a/target/arm/translate.c b/target/arm/translate.c >>>> index 83a008e945..721171794d 100644 >>>> --- a/target/arm/translate.c >>>> +++ b/target/arm/translate.c >>>> @@ -603,16 +603,6 @@ static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, >>>> TCGv_i32 t1) >>>> tcg_temp_free_i32(tmp1); >>>> } >>>> >>>> -static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src) >>>> -{ >>>> - TCGv_i32 c0 = tcg_const_i32(0); >>>> - TCGv_i32 tmp = tcg_temp_new_i32(); >>>> - tcg_gen_neg_i32(tmp, src); >>>> - tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp); >>>> - tcg_temp_free_i32(c0); >>>> - tcg_temp_free_i32(tmp); >>>> -} >>>> - >>>> static void shifter_out_im(TCGv_i32 var, int shift) >>>> { >>>> if (shift == 0) { >>>> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c >>>> index a00d1df37e..0ac291f1c4 100644 >>>> --- a/tcg/tcg-op.c >>>> +++ b/tcg/tcg-op.c >>>> @@ -1091,6 +1091,16 @@ void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, >>>> TCGv_i32 b) >>>> tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a); >>>> } >>>> >>>> +void tcg_gen_abs_i32(TCGv_i32 ret, TCGv_i32 a) >>>> +{ >>>> + TCGv_i32 t = tcg_temp_new_i32(); >>>> + >>>> + tcg_gen_sari_i32(t, a, 31); >>>> + tcg_gen_xor_i32(ret, a, t); >>>> + tcg_gen_sub_i32(ret, ret, t); >>>> + tcg_temp_free_i32(t); >>>> +} >>>> + >>>> /* 64-bit ops */ >>>> >>>> #if TCG_TARGET_REG_BITS == 32 >>>> @@ -2548,6 +2558,16 @@ void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, >>>> TCGv_i64 b) >>>> tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a); >>>> } >>>> >>>> +void tcg_gen_abs_i64(TCGv_i64 ret, TCGv_i64 a) >>>> +{ >>>> + TCGv_i64 t = tcg_temp_new_i64(); >>>> + >>>> + tcg_gen_sari_i64(t, a, 63); >>>> + tcg_gen_xor_i64(ret, a, t); >>>> + tcg_gen_sub_i64(ret, ret, t); >>>> + tcg_temp_free_i64(t); >>>> +} >>>> + >>>> /* Size changing operations. */ >>>> >>>> void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) >>>> >>> >>> Nice trick >> >> Per commit 7dcfb0897b99, I think it's worth a: >> >> Inspired-by: Edgar E. Iglesias <edgar.igles...@gmail.com> > > *shrug* As per the comment, I got the sequence from gcc -O2 -S.
Now I understand better your comment "Use a branchless implementation of abs that gcc uses for x86". Previously I misunderstood it =) Back to commit 7dcfb0897b99, eventually Edgar figured the same trick from GCC. Regards, Phil.