Make fallthru be TLB hit and branch be TLB miss.  Doing this
both improves branch prediction and will allow further cleanup.

Signed-off-by: Richard Henderson <r...@twiddle.net>
---
 tcg/i386/tcg-target.c |  172 +++++++++++++++++++++++--------------------------
 1 files changed, 80 insertions(+), 92 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 30c933c..0d85ec0 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -771,26 +771,21 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int 
datalo, int datahi,
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits;
+    int addr_reg, addr_reg2 = 0;
+    int data_reg, data_reg2 = 0;
+    int r0, r1, mem_index, s_bits;
 #if defined(CONFIG_SOFTMMU)
-    uint8_t *label1_ptr, *label2_ptr;
-#endif
-#if TARGET_LONG_BITS == 64
-#if defined(CONFIG_SOFTMMU)
-    uint8_t *label3_ptr;
-#endif
-    int addr_reg2;
+    uint8_t *label_ptr[3];
 #endif
 
     data_reg = *args++;
-    if (opc == 3)
+    if (opc == 3) {
         data_reg2 = *args++;
-    else
-        data_reg2 = 0;
+    }
     addr_reg = *args++;
-#if TARGET_LONG_BITS == 64
-    addr_reg2 = *args++;
-#endif
+    if (TARGET_LONG_BITS == 64) {
+        addr_reg2 = *args++;
+    }
     mem_index = *args;
     s_bits = opc & 3;
 
@@ -815,28 +810,42 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args,
 
     tcg_out_mov(s, r0, addr_reg);
 
-#if TARGET_LONG_BITS == 32
-    /* je label1 */
-    tcg_out8(s, OPC_JCC_short + JCC_JE);
-    label1_ptr = s->code_ptr;
-    s->code_ptr++;
-#else
-    /* jne label3 */
+    /* jne label1 */
     tcg_out8(s, OPC_JCC_short + JCC_JNE);
-    label3_ptr = s->code_ptr;
+    label_ptr[0] = s->code_ptr;
     s->code_ptr++;
 
-    /* cmp 4(r1), addr_reg2 */
-    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
+    if (TARGET_LONG_BITS == 64) {
+        /* cmp 4(r1), addr_reg2 */
+        tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
+
+        /* jne label1 */
+        tcg_out8(s, OPC_JCC_short + JCC_JNE);
+        label_ptr[1] = s->code_ptr;
+        s->code_ptr++;
+    }
+
+    /* TLB Hit.  */
+
+    /* add x(r1), r0 */
+    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
+                         offsetof(CPUTLBEntry, addend) -
+                         offsetof(CPUTLBEntry, addr_read));
+
+    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, r0, 0, opc);
 
-    /* je label1 */
-    tcg_out8(s, OPC_JCC_short + JCC_JE);
-    label1_ptr = s->code_ptr;
+    /* jmp label2 */
+    tcg_out8(s, OPC_JMP_short);
+    label_ptr[2] = s->code_ptr;
     s->code_ptr++;
 
-    /* label3: */
-    *label3_ptr = s->code_ptr - label3_ptr - 1;
-#endif
+    /* TLB Miss.  */
+
+    /* label1: */
+    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
+    if (TARGET_LONG_BITS == 64) {
+        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
+    }
 
     /* XXX: move that code at the end of the TB */
 #if TARGET_LONG_BITS == 32
@@ -876,23 +885,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args,
         break;
     }
 
-    /* jmp label2 */
-    tcg_out8(s, OPC_JMP_short);
-    label2_ptr = s->code_ptr;
-    s->code_ptr++;
-
-    /* label1: */
-    *label1_ptr = s->code_ptr - label1_ptr - 1;
-
-    /* add x(r1), r0 */
-    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
-                         offsetof(CPUTLBEntry, addend) -
-                         offsetof(CPUTLBEntry, addr_read));
-
-    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, r0, 0, opc);
-
     /* label2: */
-    *label2_ptr = s->code_ptr - label2_ptr - 1;
+    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
 #else
     tcg_out_qemu_ld_direct(s, data_reg, data_reg2, addr_reg, GUEST_BASE, opc);
 #endif
@@ -955,27 +949,22 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int 
datalo, int datahi,
 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits;
+    int addr_reg, addr_reg2 = 0;
+    int data_reg, data_reg2 = 0;
+    int r0, r1, mem_index, s_bits;
 #if defined(CONFIG_SOFTMMU)
     int stack_adjust;
-    uint8_t *label1_ptr, *label2_ptr;
-#endif
-#if TARGET_LONG_BITS == 64
-#if defined(CONFIG_SOFTMMU)
-    uint8_t *label3_ptr;
-#endif
-    int addr_reg2;
+    uint8_t *label_ptr[3];
 #endif
 
     data_reg = *args++;
-    if (opc == 3)
+    if (opc == 3) {
         data_reg2 = *args++;
-    else
-        data_reg2 = 0;
+    }
     addr_reg = *args++;
-#if TARGET_LONG_BITS == 64
-    addr_reg2 = *args++;
-#endif
+    if (TARGET_LONG_BITS == 64) {
+        addr_reg2 = *args++;
+    }
     mem_index = *args;
 
     s_bits = opc;
@@ -1001,28 +990,42 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args,
 
     tcg_out_mov(s, r0, addr_reg);
 
-#if TARGET_LONG_BITS == 32
-    /* je label1 */
-    tcg_out8(s, OPC_JCC_short + JCC_JE);
-    label1_ptr = s->code_ptr;
-    s->code_ptr++;
-#else
-    /* jne label3 */
+    /* jne label1 */
     tcg_out8(s, OPC_JCC_short + JCC_JNE);
-    label3_ptr = s->code_ptr;
+    label_ptr[0] = s->code_ptr;
     s->code_ptr++;
 
-    /* cmp 4(r1), addr_reg2 */
-    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
+    if (TARGET_LONG_BITS == 64) {
+        /* cmp 4(r1), addr_reg2 */
+        tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
+
+        /* jne label1 */
+        tcg_out8(s, OPC_JCC_short + JCC_JNE);
+        label_ptr[1] = s->code_ptr;
+        s->code_ptr++;
+    }
+
+    /* TLB Hit.  */
+
+    /* add x(r1), r0 */
+    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
+                         offsetof(CPUTLBEntry, addend) -
+                         offsetof(CPUTLBEntry, addr_write));
+
+    tcg_out_qemu_st_direct(s, data_reg, data_reg2, r0, 0, opc);
 
-    /* je label1 */
-    tcg_out8(s, OPC_JCC_short + JCC_JE);
-    label1_ptr = s->code_ptr;
+    /* jmp label2 */
+    tcg_out8(s, OPC_JMP_short);
+    label_ptr[2] = s->code_ptr;
     s->code_ptr++;
 
-    /* label3: */
-    *label3_ptr = s->code_ptr - label3_ptr - 1;
-#endif
+    /* TLB Miss.  */
+
+    /* label1: */
+    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
+    if (TARGET_LONG_BITS == 64) {
+        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
+    }
 
     /* XXX: move that code at the end of the TB */
 #if TARGET_LONG_BITS == 32
@@ -1080,23 +1083,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args,
         tcg_out_addi(s, TCG_REG_ESP, stack_adjust);
     }
 
-    /* jmp label2 */
-    tcg_out8(s, OPC_JMP_short);
-    label2_ptr = s->code_ptr;
-    s->code_ptr++;
-
-    /* label1: */
-    *label1_ptr = s->code_ptr - label1_ptr - 1;
-
-    /* add x(r1), r0 */
-    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
-                         offsetof(CPUTLBEntry, addend) -
-                         offsetof(CPUTLBEntry, addr_write));
-
-    tcg_out_qemu_st_direct(s, data_reg, data_reg2, r0, 0, opc);
-
     /* label2: */
-    *label2_ptr = s->code_ptr - label2_ptr - 1;
+    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
 #else
     tcg_out_qemu_st_direct(s, data_reg, data_reg2, addr_reg, GUEST_BASE, opc);
 #endif
-- 
1.7.0.1


Reply via email to