Richard Henderson <r...@twiddle.net> writes: > From: "Emilio G. Cota" <c...@braap.org> > > Speed up indirect branches by jumping to the target if it is valid. > > Softmmu measurements (see later commit for user-mode numbers): > > Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0. > > - SPECint06 (test set), x86_64-softmmu (Ubuntu 16.04 guest). > Host: Intel i7-4790K @ 4.00GHz > > 2.4x > +-+--------------------------------------------------------------------------------------------------------------+-+ > | > | > | cross > | > 2.2x > +cross+jr..........................................................................+++...........................+-+ > | > | | > | > +++ | | > 2x > +-+..............................................................................|..|............................+-+ > | > | | | > | > | | | > 1.8x > +-+..............................................................................|####...........................+-+ > | > |# |# | > | > **** |# | > 1.6x > +-+............................................................................*.|*.|#...........................+-+ > | > * |* |# | > | > * |* |# | > 1.4x > +-+.......................................................................+++..*.|*.|#...........................+-+ > | ++++++ > #### * |*++# +++ | > | +++ | | > #++# *++* # +++ | | > 1.2x > +-+......................###.....####....+++............|..|...........****..#.*..*..#....####...|.###.....####..+-+ > | +++ **** # **** # #### ***### > *++* # * * # #++# ****|# +++#++# | > | ****### +++ *++* # *++* # ++# # #### *|* |# +++ > * * # * * # *** # *| *|# **** # | > 1x > +-++-*++*++#++***###++*++*+#++*+-*++#+****++#++***++#+-*+*++#-+****##++*++*-+#+*++*-+#++*+*++#++*-+*+#++*++*++#-++-+ > | * * # * * # * * # * * # * * # * * # *|* |# *++* # > * * # * * # * * # * * # * * # | > | * * # * * # * * # * * # * * # * * # *+*++# * * # > * * # * * # * * # * * # * * # | > 0.8x > +-+--****###--***###--****##--****###-****###--***###--***###--****##--****###-****###--***###--****##--****###--+-+ > astar bzip2 gcc gobmk h264ref hmmlibquantum mcf > omnetpperlbench sjengxalancbmk hmean > png: http://imgur.com/DU36YFU > > NB. 'cross' represents the previous commit. > > Reviewed-by: Richard Henderson <r...@twiddle.net> > Signed-off-by: Emilio G. Cota <c...@braap.org> > Message-Id: <1493263764-18657-11-git-send-email-c...@braap.org> > Signed-off-by: Richard Henderson <r...@twiddle.net>
Reviewed-by: Alex Bennée <alex.ben...@linaro.org> > --- > target/i386/translate.c | 14 ++++++++------ > 1 file changed, 8 insertions(+), 6 deletions(-) > > diff --git a/target/i386/translate.c b/target/i386/translate.c > index ea113fe..674ec96 100644 > --- a/target/i386/translate.c > +++ b/target/i386/translate.c > @@ -4996,7 +4996,7 @@ static target_ulong disas_insn(CPUX86State *env, > DisasContext *s, > gen_push_v(s, cpu_T1); > gen_op_jmp_v(cpu_T0); > gen_bnd_jmp(s); > - gen_eob(s); > + gen_jr(s, cpu_T0); > break; > case 3: /* lcall Ev */ > gen_op_ld_v(s, ot, cpu_T1, cpu_A0); > @@ -5014,7 +5014,8 @@ static target_ulong disas_insn(CPUX86State *env, > DisasContext *s, > tcg_const_i32(dflag - 1), > tcg_const_i32(s->pc - s->cs_base)); > } > - gen_eob(s); > + tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); > + gen_jr(s, cpu_tmp4); > break; > case 4: /* jmp Ev */ > if (dflag == MO_16) { > @@ -5022,7 +5023,7 @@ static target_ulong disas_insn(CPUX86State *env, > DisasContext *s, > } > gen_op_jmp_v(cpu_T0); > gen_bnd_jmp(s); > - gen_eob(s); > + gen_jr(s, cpu_T0); > break; > case 5: /* ljmp Ev */ > gen_op_ld_v(s, ot, cpu_T1, cpu_A0); > @@ -5037,7 +5038,8 @@ static target_ulong disas_insn(CPUX86State *env, > DisasContext *s, > gen_op_movl_seg_T0_vm(R_CS); > gen_op_jmp_v(cpu_T1); > } > - gen_eob(s); > + tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); > + gen_jr(s, cpu_tmp4); > break; > case 6: /* push Ev */ > gen_push_v(s, cpu_T0); > @@ -6417,7 +6419,7 @@ static target_ulong disas_insn(CPUX86State *env, > DisasContext *s, > /* Note that gen_pop_T0 uses a zero-extending load. */ > gen_op_jmp_v(cpu_T0); > gen_bnd_jmp(s); > - gen_eob(s); > + gen_jr(s, cpu_T0); > break; > case 0xc3: /* ret */ > ot = gen_pop_T0(s); > @@ -6425,7 +6427,7 @@ static target_ulong disas_insn(CPUX86State *env, > DisasContext *s, > /* Note that gen_pop_T0 uses a zero-extending load. */ > gen_op_jmp_v(cpu_T0); > gen_bnd_jmp(s); > - gen_eob(s); > + gen_jr(s, cpu_T0); > break; > case 0xca: /* lret im */ > val = cpu_ldsw_code(env, s->pc); -- Alex Bennée