Implement strex and ldrex instruction relying on TCG's qemu_ldlink and
qemu_stcond.  For the time being only the 32bit instructions are supported.

Suggested-by: Jani Kokkonen <jani.kokko...@huawei.com>
Suggested-by: Claudio Fontana <claudio.font...@huawei.com>
Signed-off-by: Alvise Rigo <a.r...@virtualopensystems.com>
---
 target-arm/translate.c |  94 ++++++++++++++++++++++++++++++++++++++++++-
 tcg/arm/tcg-target.c   | 105 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 170 insertions(+), 29 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index 9116529..3b209a5 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -956,6 +956,18 @@ DO_GEN_ST(8, MO_UB)
 DO_GEN_ST(16, MO_TEUW)
 DO_GEN_ST(32, MO_TEUL)
 
+/* Load/Store exclusive generators (always unsigned) */
+static inline void gen_aa32_ldex32(TCGv_i32 val, TCGv_i32 addr, int index)
+{
+    tcg_gen_qemu_ldlink_i32(val, addr, index, MO_TEUL);
+}
+
+static inline void gen_aa32_stex32(TCGv_i32 is_excl, TCGv_i32 val,
+                                   TCGv_i32 addr, int index)
+{
+    tcg_gen_qemu_stcond_i32(is_excl, val, addr, index, MO_TEUL);
+}
+
 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 {
     tcg_gen_movi_i32(cpu_R[15], val);
@@ -7420,6 +7432,26 @@ static void gen_load_exclusive(DisasContext *s, int rt, 
int rt2,
     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
 }
 
+static void gen_load_exclusive_multi(DisasContext *s, int rt, int rt2,
+                                     TCGv_i32 addr, int size)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32();
+
+    switch (size) {
+    case 0:
+    case 1:
+        abort();
+    case 2:
+        gen_aa32_ldex32(tmp, addr, get_mem_index(s));
+        break;
+    case 3:
+    default:
+        abort();
+    }
+
+    store_reg(s, rt, tmp);
+}
+
 static void gen_clrex(DisasContext *s)
 {
     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
@@ -7518,6 +7550,63 @@ static void gen_store_exclusive(DisasContext *s, int rd, 
int rt, int rt2,
     gen_set_label(done_label);
     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
 }
+
+static void gen_store_exclusive_multi(DisasContext *s, int rd, int rt, int rt2,
+                                      TCGv_i32 addr, int size)
+{
+    TCGv_i32 tmp;
+    TCGv_i32 is_dirty;
+    TCGv_i32 success;
+    TCGLabel *done_label;
+    TCGLabel *fail_label;
+
+    fail_label = gen_new_label();
+    done_label = gen_new_label();
+
+    tmp = tcg_temp_new_i32();
+    is_dirty = tcg_temp_new_i32();
+    success = tcg_temp_new_i32();
+    switch (size) {
+    case 0:
+    case 1:
+        abort();
+        break;
+    case 2:
+        gen_aa32_stex32(is_dirty, tmp, addr, get_mem_index(s));
+        break;
+    case 3:
+    default:
+        abort();
+    }
+
+    tcg_temp_free_i32(tmp);
+
+    /* Check if the store conditional has to fail */
+    tcg_gen_movi_i32(success, 1);
+    tcg_gen_brcond_i32(TCG_COND_EQ, is_dirty, success, fail_label);
+    tcg_temp_free_i32(success);
+    tcg_temp_free_i32(is_dirty);
+
+    tmp = load_reg(s, rt);
+    switch (size) {
+    case 0:
+    case 1:
+        abort();
+    case 2:
+        gen_aa32_st32(tmp, addr, get_mem_index(s));
+        break;
+    case 3:
+    default:
+        abort();
+    }
+    tcg_temp_free_i32(tmp);
+
+    tcg_gen_movi_i32(cpu_R[rd], 0); // is_dirty = 0
+    tcg_gen_br(done_label);
+    gen_set_label(fail_label);
+    tcg_gen_movi_i32(cpu_R[rd], 1); // is_dirty = 1
+    gen_set_label(done_label);
+}
 #endif
 
 /* gen_srs:
@@ -8365,7 +8454,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int 
insn)
                         } else if (insn & (1 << 20)) {
                             switch (op1) {
                             case 0: /* ldrex */
-                                gen_load_exclusive(s, rd, 15, addr, 2);
+                                gen_load_exclusive_multi(s, rd, 15, addr, 2);
                                 break;
                             case 1: /* ldrexd */
                                 gen_load_exclusive(s, rd, rd + 1, addr, 3);
@@ -8383,7 +8472,8 @@ static void disas_arm_insn(DisasContext *s, unsigned int 
insn)
                             rm = insn & 0xf;
                             switch (op1) {
                             case 0:  /*  strex */
-                                gen_store_exclusive(s, rd, rm, 15, addr, 2);
+                                gen_store_exclusive_multi(s, rd, rm, 15,
+                                                          addr, 2);
                                 break;
                             case 1: /*  strexd */
                                 gen_store_exclusive(s, rd, rm, rm + 1, addr, 
3);
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 01e6fbf..1a301be 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1069,6 +1069,11 @@ static void * const qemu_ld_helpers[16] = {
     [MO_BESL] = helper_be_ldul_mmu,
 };
 
+/* LoadLink helpers, only unsigned */
+static void * const qemu_ldex_helpers[16] = {
+    [MO_LEUL] = helper_le_ldlinkul_mmu,
+};
+
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
  *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
  */
@@ -1082,6 +1087,11 @@ static void * const qemu_st_helpers[16] = {
     [MO_BEQ]  = helper_be_stq_mmu,
 };
 
+/* StoreConditional helpers, only unsigned */
+static void * const qemu_stex_helpers[16] = {
+    [MO_LEUL] = helper_le_stcondl_mmu,
+};
+
 /* Helper routines for marshalling helper function arguments into
  * the correct registers and stack.
  * argreg is where we want to put this argument, arg is the argument itself.
@@ -1222,6 +1232,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
    path for a load or store, so that we can later generate the correct
    helper code.  */
 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
+                                bool is_excl, TCGReg llsc_success,
                                 TCGReg datalo, TCGReg datahi, TCGReg addrlo,
                                 TCGReg addrhi, int mem_index,
                                 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
@@ -1229,6 +1240,8 @@ static void add_qemu_ldst_label(TCGContext *s, bool 
is_ld, TCGMemOp opc,
     TCGLabelQemuLdst *label = new_ldst_label(s);
 
     label->is_ld = is_ld;
+    label->is_llsc = is_excl;
+    label->llsc_success = llsc_success;
     label->opc = opc;
     label->datalo_reg = datalo;
     label->datahi_reg = datahi;
@@ -1259,12 +1272,16 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
     /* For armv6 we can use the canonical unsigned helpers and minimize
        icache usage.  For pre-armv6, use the signed helpers since we do
        not have a single insn sign-extend.  */
-    if (use_armv6_instructions) {
-        func = qemu_ld_helpers[opc & ~MO_SIGN];
+    if (lb->is_llsc) {
+        func = qemu_ldex_helpers[opc];
     } else {
-        func = qemu_ld_helpers[opc];
-        if (opc & MO_SIGN) {
-            opc = MO_UL;
+        if (use_armv6_instructions) {
+            func = qemu_ld_helpers[opc & ~MO_SIGN];
+        } else {
+            func = qemu_ld_helpers[opc];
+            if (opc & MO_SIGN) {
+                opc = MO_UL;
+            }
         }
     }
     tcg_out_call(s, func);
@@ -1336,7 +1353,14 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
     argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
 
     /* Tail-call to the helper, which will return to the fast path.  */
-    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc]);
+    if (lb->is_llsc) {
+        tcg_out_call(s, qemu_stex_helpers[opc]);
+        /* Save the output of the StoreConditional */
+        tcg_out_mov_reg(s, COND_AL, lb->llsc_success, TCG_REG_R0);
+        tcg_out_goto(s, COND_AL, lb->raddr);
+    } else {
+        tcg_out_goto(s, COND_AL, qemu_st_helpers[opc]);
+    }
 }
 #endif /* SOFTMMU */
 
@@ -1460,7 +1484,8 @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, 
TCGMemOp opc,
     }
 }
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64,
+                            bool isLoadLink)
 {
     TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
     TCGMemOp opc;
@@ -1478,17 +1503,23 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
 
 #ifdef CONFIG_SOFTMMU
     mem_index = *args;
-    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1);
 
     /* This a conditional BL only to load a pointer within this opcode into LR
-       for the slow path.  We will not be using the value for a tail call.  */
-    label_ptr = s->code_ptr;
-    tcg_out_bl_noaddr(s, COND_NE);
-
-    tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
+       for the slow path.  We will not be using the value for a tail call.
+       In the context of a LoadLink instruction, we don't check the TLB but we
+       always follow the slow path.  */
+    if (isLoadLink) {
+        label_ptr = s->code_ptr;
+        tcg_out_bl_noaddr(s, COND_AL);
+    } else {
+        addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 
1);
+        label_ptr = s->code_ptr;
+        tcg_out_bl_noaddr(s, COND_NE);
+        tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
+    }
 
-    add_qemu_ldst_label(s, true, opc, datalo, datahi, addrlo, addrhi,
-                        mem_index, s->code_ptr, label_ptr);
+    add_qemu_ldst_label(s, true, opc, isLoadLink, 0, datalo, datahi, addrlo,
+                        addrhi, mem_index, s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
     if (GUEST_BASE) {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
@@ -1589,9 +1620,11 @@ static inline void tcg_out_qemu_st_direct(TCGContext *s, 
TCGMemOp opc,
     }
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64,
+                            bool isStoreCond)
 {
     TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
+    TCGReg llsc_success;
     TCGMemOp opc;
 #ifdef CONFIG_SOFTMMU
     int mem_index;
@@ -1599,6 +1632,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
     tcg_insn_unit *label_ptr;
 #endif
 
+    llsc_success = (isStoreCond ? *args++ : 0);
+
     datalo = *args++;
     datahi = (is64 ? *args++ : 0);
     addrlo = *args++;
@@ -1607,16 +1642,24 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
 
 #ifdef CONFIG_SOFTMMU
     mem_index = *args;
-    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0);
 
-    tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
+    if (isStoreCond) {
+        /* Always follow the slow-path for an exclusive access */
+        label_ptr = s->code_ptr;
+        tcg_out_bl_noaddr(s, COND_AL);
+    } else {
+        addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 
0);
 
-    /* The conditional call must come last, as we're going to return here.  */
-    label_ptr = s->code_ptr;
-    tcg_out_bl_noaddr(s, COND_NE);
+        tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
 
-    add_qemu_ldst_label(s, false, opc, datalo, datahi, addrlo, addrhi,
-                        mem_index, s->code_ptr, label_ptr);
+        /* The conditional call must come last, as we're going to return here. 
 */
+        label_ptr = s->code_ptr;
+        tcg_out_bl_noaddr(s, COND_NE);
+    }
+
+    add_qemu_ldst_label(s, false, opc, isStoreCond, llsc_success, datalo,
+                        datahi, addrlo, addrhi, mem_index, s->code_ptr,
+                        label_ptr);
 #else /* !CONFIG_SOFTMMU */
     if (GUEST_BASE) {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
@@ -1859,16 +1902,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
         break;
 
     case INDEX_op_qemu_ld_i32:
-        tcg_out_qemu_ld(s, args, 0);
+        tcg_out_qemu_ld(s, args, 0, 0);
+        break;
+    case INDEX_op_qemu_ldlink_i32:
+        tcg_out_qemu_ld(s, args, 0, 1); // LoadLink
         break;
     case INDEX_op_qemu_ld_i64:
-        tcg_out_qemu_ld(s, args, 1);
+        tcg_out_qemu_ld(s, args, 1, 0);
         break;
     case INDEX_op_qemu_st_i32:
-        tcg_out_qemu_st(s, args, 0);
+        tcg_out_qemu_st(s, args, 0, 0);
+        break;
+    case INDEX_op_qemu_stcond_i32:
+        tcg_out_qemu_st(s, args, 0, 1); // StoreConditional
         break;
     case INDEX_op_qemu_st_i64:
-        tcg_out_qemu_st(s, args, 1);
+        tcg_out_qemu_st(s, args, 1, 0);
         break;
 
     case INDEX_op_bswap16_i32:
@@ -1952,8 +2001,10 @@ static const TCGTargetOpDef arm_op_defs[] = {
 
 #if TARGET_LONG_BITS == 32
     { INDEX_op_qemu_ld_i32, { "r", "l" } },
+    { INDEX_op_qemu_ldlink_i32, { "r", "l" } },
     { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
     { INDEX_op_qemu_st_i32, { "s", "s" } },
+    { INDEX_op_qemu_stcond_i32, { "r", "s", "s" } },
     { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
 #else
     { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
-- 
2.4.0


Reply via email to