On 09/05/2019 01:35, Anton Blanchard wrote: > A few small optimisations: > > In VSX_LOAD_SCALAR_DS() we can don't need to read the VSR via > get_cpu_vsrh(). > > Split VSX_VECTOR_LOAD_STORE() into two functions. Loads only need to > write the VSRs (set_cpu_vsr*()) and stores only need to read the VSRs > (get_cpu_vsr*()) > > Thanks to Mark Cave-Ayland for the suggestions. > > Signed-off-by: Anton Blanchard <an...@ozlabs.org> > --- > target/ppc/translate/vsx-impl.inc.c | 68 ++++++++++++++++++++++++----- > 1 file changed, 58 insertions(+), 10 deletions(-) > > diff --git a/target/ppc/translate/vsx-impl.inc.c > b/target/ppc/translate/vsx-impl.inc.c > index 4b7627f53b..cdb44b8b70 100644 > --- a/target/ppc/translate/vsx-impl.inc.c > +++ b/target/ppc/translate/vsx-impl.inc.c > @@ -228,7 +228,7 @@ static void gen_lxvb16x(DisasContext *ctx) > tcg_temp_free_i64(xtl); > } > > -#define VSX_VECTOR_LOAD_STORE(name, op, indexed) \ > +#define VSX_VECTOR_LOAD(name, op, indexed) \ > static void gen_##name(DisasContext *ctx) \ > { \ > int xt; \ > @@ -255,8 +255,6 @@ static void gen_##name(DisasContext *ctx) > \ > } \ > xth = tcg_temp_new_i64(); \ > xtl = tcg_temp_new_i64(); \ > - get_cpu_vsrh(xth, xt); \ > - get_cpu_vsrl(xtl, xt); \ > gen_set_access_type(ctx, ACCESS_INT); \ > EA = tcg_temp_new(); \ > if (indexed) { \ > @@ -282,10 +280,61 @@ static void gen_##name(DisasContext *ctx) > \ > tcg_temp_free_i64(xtl); \ > } > > -VSX_VECTOR_LOAD_STORE(lxv, ld_i64, 0) > -VSX_VECTOR_LOAD_STORE(stxv, st_i64, 0) > -VSX_VECTOR_LOAD_STORE(lxvx, ld_i64, 1) > -VSX_VECTOR_LOAD_STORE(stxvx, st_i64, 1) > +VSX_VECTOR_LOAD(lxv, ld_i64, 0) > +VSX_VECTOR_LOAD(lxvx, ld_i64, 1) > + > +#define VSX_VECTOR_STORE(name, op, indexed) \ > +static void gen_##name(DisasContext *ctx) \ > +{ \ > + int xt; \ > + TCGv EA; \ > + TCGv_i64 xth; \ > + TCGv_i64 xtl; \ > + \ > + if (indexed) { \ > + xt = xT(ctx->opcode); \ > + } else { \ > + xt = DQxT(ctx->opcode); \ > + } \ > + \ > + if (xt < 32) { \ > + if (unlikely(!ctx->vsx_enabled)) { \ > + gen_exception(ctx, POWERPC_EXCP_VSXU); \ > + return; \ > + } \ > + } else { \ > + if (unlikely(!ctx->altivec_enabled)) { \ > + gen_exception(ctx, POWERPC_EXCP_VPU); \ > + return; \ > + } \ > + } \ > + xth = tcg_temp_new_i64(); \ > + xtl = tcg_temp_new_i64(); \ > + get_cpu_vsrh(xth, xt); \ > + get_cpu_vsrl(xtl, xt); \ > + gen_set_access_type(ctx, ACCESS_INT); \ > + EA = tcg_temp_new(); \ > + if (indexed) { \ > + gen_addr_reg_index(ctx, EA); \ > + } else { \ > + gen_addr_imm_index(ctx, EA, 0x0F); \ > + } \ > + if (ctx->le_mode) { \ > + tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_LEQ); \ > + tcg_gen_addi_tl(EA, EA, 8); \ > + tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_LEQ); \ > + } else { \ > + tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_BEQ); \ > + tcg_gen_addi_tl(EA, EA, 8); \ > + tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_BEQ); \ > + } \ > + tcg_temp_free(EA); \ > + tcg_temp_free_i64(xth); \ > + tcg_temp_free_i64(xtl); \ > +} > + > +VSX_VECTOR_STORE(stxv, st_i64, 0) > +VSX_VECTOR_STORE(stxvx, st_i64, 1) > > #ifdef TARGET_PPC64 > #define VSX_VECTOR_LOAD_STORE_LENGTH(name) \ > @@ -330,7 +379,6 @@ static void gen_##name(DisasContext *ctx) > \ > return; \ > } \ > xth = tcg_temp_new_i64(); \ > - get_cpu_vsrh(xth, rD(ctx->opcode) + 32); \ > gen_set_access_type(ctx, ACCESS_INT); \ > EA = tcg_temp_new(); \ > gen_addr_imm_index(ctx, EA, 0x03); \ > @@ -514,8 +562,8 @@ static void gen_##name(DisasContext *ctx) > \ > tcg_temp_free_i64(xth); \ > } > > -VSX_LOAD_SCALAR_DS(stxsd, st64_i64) > -VSX_LOAD_SCALAR_DS(stxssp, st32fs) > +VSX_STORE_SCALAR_DS(stxsd, st64_i64) > +VSX_STORE_SCALAR_DS(stxssp, st32fs) > > static void gen_mfvsrwz(DisasContext *ctx) > {
Reviewed-by: Mark Cave-Ayland <mark.cave-ayl...@ilande.co.uk> ATB, Mark.