On Mon, Sep 17, 2012 at 1:08 AM, Aurelien Jarno <aurel...@aurel32.net> wrote: > Now that the setcond TCG op is available, it's possible to replace > shl and shr helpers by TCG code. The code generated by TCG is slightly > longer than the code generated by GCC for the helper but is still worth > it as this avoid all the consequences of using an helper: globals saved > back to memory, no possible optimization, call overhead, etc. > > Cc: Peter Maydell <peter.mayd...@linaro.org> > Signed-off-by: Aurelien Jarno <aurel...@aurel32.net> > --- > target-arm/helper.h | 2 -- > target-arm/op_helper.c | 16 ---------------- > target-arm/translate.c | 32 ++++++++++++++++++++++++++++---- > 3 files changed, 28 insertions(+), 22 deletions(-) > > diff --git a/target-arm/helper.h b/target-arm/helper.h > index 7151e28..b123d3e 100644 > --- a/target-arm/helper.h > +++ b/target-arm/helper.h > @@ -145,8 +145,6 @@ DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32) > DEF_HELPER_3(adc_cc, i32, env, i32, i32) > DEF_HELPER_3(sbc_cc, i32, env, i32, i32) > > -DEF_HELPER_3(shl, i32, env, i32, i32) > -DEF_HELPER_3(shr, i32, env, i32, i32) > DEF_HELPER_3(sar, i32, env, i32, i32) > DEF_HELPER_3(shl_cc, i32, env, i32, i32) > DEF_HELPER_3(shr_cc, i32, env, i32, i32) > diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c > index 6095f24..5fcd12c 100644 > --- a/target-arm/op_helper.c > +++ b/target-arm/op_helper.c > @@ -355,22 +355,6 @@ uint32_t HELPER(sbc_cc)(CPUARMState *env, uint32_t a, > uint32_t b) > > /* Similarly for variable shift instructions. */ > > -uint32_t HELPER(shl)(CPUARMState *env, uint32_t x, uint32_t i) > -{ > - int shift = i & 0xff; > - if (shift >= 32) > - return 0; > - return x << shift; > -} > - > -uint32_t HELPER(shr)(CPUARMState *env, uint32_t x, uint32_t i) > -{ > - int shift = i & 0xff; > - if (shift >= 32) > - return 0; > - return (uint32_t)x >> shift; > -} > - > uint32_t HELPER(sar)(CPUARMState *env, uint32_t x, uint32_t i) > { > int shift = i & 0xff; > diff --git a/target-arm/translate.c b/target-arm/translate.c > index 19bb1e8..9c29065 100644 > --- a/target-arm/translate.c > +++ b/target-arm/translate.c > @@ -440,6 +440,26 @@ static void gen_sub_CC(TCGv dest, TCGv t0, TCGv t1) > tcg_gen_mov_i32(dest, cpu_NF); > } > > +#define GEN_SHIFT(name) \ > +static void gen_##name(TCGv dest, TCGv t0, TCGv t1) \ > +{ \ > + TCGv tmp1, tmp2; \ > + tmp1 = tcg_temp_new_i32(); \ > + tcg_gen_andi_i32(tmp1, t1, 0xff); \ > + tmp2 = tcg_temp_new_i32(); \ > + tcg_gen_setcondi_i32(TCG_COND_GE, tmp2, tmp1, 32); \ > + tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
I don't think the 'and 0x1f' is needed given that later you'll and with 0 if the shift amount is >= 32. > + tcg_gen_##name##_i32(dest, t0, tmp1); \ > + tcg_temp_free_i32(tmp1); \ > + tcg_gen_subi_i32(tmp2, tmp2, 1); \ > + tcg_gen_and_i32(dest, dest, tmp2); \ > + tcg_temp_free_i32(tmp2); \ > +} Laurent > +GEN_SHIFT(shl) > +GEN_SHIFT(shr) > +#undef GEN_SHIFT > + > + > /* FIXME: Implement this natively. */ > #define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1) > > @@ -516,8 +536,12 @@ static inline void gen_arm_shift_reg(TCGv var, int > shiftop, > } > } else { > switch (shiftop) { > - case 0: gen_helper_shl(var, cpu_env, var, shift); break; > - case 1: gen_helper_shr(var, cpu_env, var, shift); break; > + case 0: > + gen_shl(var, var, shift); > + break; > + case 1: > + gen_shr(var, var, shift); > + break; > case 2: gen_helper_sar(var, cpu_env, var, shift); break; > case 3: tcg_gen_andi_i32(shift, shift, 0x1f); > tcg_gen_rotr_i32(var, var, shift); break; > @@ -9161,7 +9185,7 @@ static void disas_thumb_insn(CPUARMState *env, > DisasContext *s) > break; > case 0x2: /* lsl */ > if (s->condexec_mask) { > - gen_helper_shl(tmp2, cpu_env, tmp2, tmp); > + gen_shl(tmp2, tmp2, tmp); > } else { > gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp); > gen_logic_CC(tmp2); > @@ -9169,7 +9193,7 @@ static void disas_thumb_insn(CPUARMState *env, > DisasContext *s) > break; > case 0x3: /* lsr */ > if (s->condexec_mask) { > - gen_helper_shr(tmp2, cpu_env, tmp2, tmp); > + gen_shr(tmp2, tmp2, tmp); > } else { > gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp); > gen_logic_CC(tmp2); > -- > 1.7.10.4 > >