Make fallthru be TLB hit and branch be TLB miss. Doing this both improves branch prediction and will allow further cleanup.
Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/i386/tcg-target.c | 172 +++++++++++++++++++++++-------------------------- 1 files changed, 80 insertions(+), 92 deletions(-) diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 30c933c..0d85ec0 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -771,26 +771,21 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits; + int addr_reg, addr_reg2 = 0; + int data_reg, data_reg2 = 0; + int r0, r1, mem_index, s_bits; #if defined(CONFIG_SOFTMMU) - uint8_t *label1_ptr, *label2_ptr; -#endif -#if TARGET_LONG_BITS == 64 -#if defined(CONFIG_SOFTMMU) - uint8_t *label3_ptr; -#endif - int addr_reg2; + uint8_t *label_ptr[3]; #endif data_reg = *args++; - if (opc == 3) + if (opc == 3) { data_reg2 = *args++; - else - data_reg2 = 0; + } addr_reg = *args++; -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#endif + if (TARGET_LONG_BITS == 64) { + addr_reg2 = *args++; + } mem_index = *args; s_bits = opc & 3; @@ -815,28 +810,42 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, tcg_out_mov(s, r0, addr_reg); -#if TARGET_LONG_BITS == 32 - /* je label1 */ - tcg_out8(s, OPC_JCC_short + JCC_JE); - label1_ptr = s->code_ptr; - s->code_ptr++; -#else - /* jne label3 */ + /* jne label1 */ tcg_out8(s, OPC_JCC_short + JCC_JNE); - label3_ptr = s->code_ptr; + label_ptr[0] = s->code_ptr; s->code_ptr++; - /* cmp 4(r1), addr_reg2 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4); + if (TARGET_LONG_BITS == 64) { + /* cmp 4(r1), addr_reg2 */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4); + + /* jne label1 */ + tcg_out8(s, OPC_JCC_short + JCC_JNE); + label_ptr[1] = s->code_ptr; + s->code_ptr++; + } + + /* TLB Hit. */ + + /* add x(r1), r0 */ + tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1, + offsetof(CPUTLBEntry, addend) - + offsetof(CPUTLBEntry, addr_read)); + + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, r0, 0, opc); - /* je label1 */ - tcg_out8(s, OPC_JCC_short + JCC_JE); - label1_ptr = s->code_ptr; + /* jmp label2 */ + tcg_out8(s, OPC_JMP_short); + label_ptr[2] = s->code_ptr; s->code_ptr++; - /* label3: */ - *label3_ptr = s->code_ptr - label3_ptr - 1; -#endif + /* TLB Miss. */ + + /* label1: */ + *label_ptr[0] = s->code_ptr - label_ptr[0] - 1; + if (TARGET_LONG_BITS == 64) { + *label_ptr[1] = s->code_ptr - label_ptr[1] - 1; + } /* XXX: move that code at the end of the TB */ #if TARGET_LONG_BITS == 32 @@ -876,23 +885,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, break; } - /* jmp label2 */ - tcg_out8(s, OPC_JMP_short); - label2_ptr = s->code_ptr; - s->code_ptr++; - - /* label1: */ - *label1_ptr = s->code_ptr - label1_ptr - 1; - - /* add x(r1), r0 */ - tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1, - offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_read)); - - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, r0, 0, opc); - /* label2: */ - *label2_ptr = s->code_ptr - label2_ptr - 1; + *label_ptr[2] = s->code_ptr - label_ptr[2] - 1; #else tcg_out_qemu_ld_direct(s, data_reg, data_reg2, addr_reg, GUEST_BASE, opc); #endif @@ -955,27 +949,22 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) { - int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits; + int addr_reg, addr_reg2 = 0; + int data_reg, data_reg2 = 0; + int r0, r1, mem_index, s_bits; #if defined(CONFIG_SOFTMMU) int stack_adjust; - uint8_t *label1_ptr, *label2_ptr; -#endif -#if TARGET_LONG_BITS == 64 -#if defined(CONFIG_SOFTMMU) - uint8_t *label3_ptr; -#endif - int addr_reg2; + uint8_t *label_ptr[3]; #endif data_reg = *args++; - if (opc == 3) + if (opc == 3) { data_reg2 = *args++; - else - data_reg2 = 0; + } addr_reg = *args++; -#if TARGET_LONG_BITS == 64 - addr_reg2 = *args++; -#endif + if (TARGET_LONG_BITS == 64) { + addr_reg2 = *args++; + } mem_index = *args; s_bits = opc; @@ -1001,28 +990,42 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, tcg_out_mov(s, r0, addr_reg); -#if TARGET_LONG_BITS == 32 - /* je label1 */ - tcg_out8(s, OPC_JCC_short + JCC_JE); - label1_ptr = s->code_ptr; - s->code_ptr++; -#else - /* jne label3 */ + /* jne label1 */ tcg_out8(s, OPC_JCC_short + JCC_JNE); - label3_ptr = s->code_ptr; + label_ptr[0] = s->code_ptr; s->code_ptr++; - /* cmp 4(r1), addr_reg2 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4); + if (TARGET_LONG_BITS == 64) { + /* cmp 4(r1), addr_reg2 */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4); + + /* jne label1 */ + tcg_out8(s, OPC_JCC_short + JCC_JNE); + label_ptr[1] = s->code_ptr; + s->code_ptr++; + } + + /* TLB Hit. */ + + /* add x(r1), r0 */ + tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1, + offsetof(CPUTLBEntry, addend) - + offsetof(CPUTLBEntry, addr_write)); + + tcg_out_qemu_st_direct(s, data_reg, data_reg2, r0, 0, opc); - /* je label1 */ - tcg_out8(s, OPC_JCC_short + JCC_JE); - label1_ptr = s->code_ptr; + /* jmp label2 */ + tcg_out8(s, OPC_JMP_short); + label_ptr[2] = s->code_ptr; s->code_ptr++; - /* label3: */ - *label3_ptr = s->code_ptr - label3_ptr - 1; -#endif + /* TLB Miss. */ + + /* label1: */ + *label_ptr[0] = s->code_ptr - label_ptr[0] - 1; + if (TARGET_LONG_BITS == 64) { + *label_ptr[1] = s->code_ptr - label_ptr[1] - 1; + } /* XXX: move that code at the end of the TB */ #if TARGET_LONG_BITS == 32 @@ -1080,23 +1083,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, tcg_out_addi(s, TCG_REG_ESP, stack_adjust); } - /* jmp label2 */ - tcg_out8(s, OPC_JMP_short); - label2_ptr = s->code_ptr; - s->code_ptr++; - - /* label1: */ - *label1_ptr = s->code_ptr - label1_ptr - 1; - - /* add x(r1), r0 */ - tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1, - offsetof(CPUTLBEntry, addend) - - offsetof(CPUTLBEntry, addr_write)); - - tcg_out_qemu_st_direct(s, data_reg, data_reg2, r0, 0, opc); - /* label2: */ - *label2_ptr = s->code_ptr - label2_ptr - 1; + *label_ptr[2] = s->code_ptr - label_ptr[2] - 1; #else tcg_out_qemu_st_direct(s, data_reg, data_reg2, addr_reg, GUEST_BASE, opc); #endif -- 1.7.0.1