From: Christian Svensson <b...@cmd.nu>
This patch adds support for atomic locks
and is an adaption from
https://github.com/bluecmd/or1k-qemu/commits/or32-optimize
Tested via the atomic lock implementation of musl
Signed-off-by: Christian Svensson <b...@cmd.nu>
Signed-off-by: Sebastian Macke <sebast...@macke.de>
---
target-openrisc/cpu.h | 3 ++
target-openrisc/interrupt.c | 3 ++
target-openrisc/translate.c | 77 ++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 79 insertions(+), 4 deletions(-)
diff --git a/target-openrisc/cpu.h b/target-openrisc/cpu.h
index 69b96c6..abdba75 100644
--- a/target-openrisc/cpu.h
+++ b/target-openrisc/cpu.h
@@ -302,6 +302,9 @@ typedef struct CPUOpenRISCState {
in solt so far. */
uint32_t btaken; /* the SR_F bit */
+ target_ulong lock_addr; /* Atomicity lock address. */
+ target_ulong lock_value; /* Atomicity lock value. */
+
CPU_COMMON
/* Fields from here on are preserved across CPU reset. */
diff --git a/target-openrisc/interrupt.c b/target-openrisc/interrupt.c
index e480cfd..68d554c 100644
--- a/target-openrisc/interrupt.c
+++ b/target-openrisc/interrupt.c
@@ -54,6 +54,9 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
env->tlb->cpu_openrisc_map_address_data = &cpu_openrisc_get_phys_nommu;
env->tlb->cpu_openrisc_map_address_code = &cpu_openrisc_get_phys_nommu;
+ /* invalidate lock */
+ env->cpu_lock_addr = -1;
+
if (cs->exception_index > 0 && cs->exception_index < EXCP_NR) {
env->pc = (cs->exception_index << 8);
} else {
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 543aa67..6401b4b 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -55,6 +55,8 @@ typedef struct DisasContext {
static TCGv_ptr cpu_env;
static TCGv cpu_sr;
static TCGv cpu_R[32];
+static TCGv cpu_lock_addr;
+static TCGv cpu_lock_value;
static TCGv cpu_pc;
static TCGv jmp_pc; /* l.jr/l.jalr temp pc */
static TCGv cpu_npc;
@@ -82,6 +84,12 @@ void openrisc_translate_init(void)
env_flags = tcg_global_mem_new_i32(TCG_AREG0,
offsetof(CPUOpenRISCState, flags),
"flags");
+ cpu_lock_addr = tcg_global_mem_new(TCG_AREG0,
+ offsetof(CPUOpenRISCState, lock_addr),
+ "lock_addr");
+ cpu_lock_value = tcg_global_mem_new(TCG_AREG0,
+ offsetof(CPUOpenRISCState, lock_value),
+ "lock_value");
cpu_pc = tcg_global_mem_new(TCG_AREG0,
offsetof(CPUOpenRISCState, pc), "pc");
cpu_npc = tcg_global_mem_new(TCG_AREG0,
@@ -254,17 +262,67 @@ static void gen_jump(DisasContext *dc, uint32_t imm,
uint32_t reg, uint32_t op0)
gen_sync_flags(dc);
}
+/* According to the OpenRISC specification we should poison our atomic lock
+ * if any other store is detected to the same address. For the sake of speed
+ * and because we're single-threaded we guarantee that atomic stores
+ * fail only if an atomic load or another atomic store
+ * is executed.
+ *
+ * To prevent the potential case of an ordinary store, we save
+ * the *value* of the address at the lock time. */
+
+static void gen_atomic_load(TCGv tD, TCGv t0, DisasContext *dc)
+{
+ tcg_gen_qemu_ld_tl(tD, t0, dc->mem_idx, MO_TEUL);
+ tcg_gen_mov_i32(cpu_lock_addr, t0);
+ tcg_gen_mov_i32(cpu_lock_value, tD);
+}
+
+static void gen_atomic_store(TCGv tB, TCGv t0, DisasContext *dc)
+{
+ int store_fail;
+ int store_done;
+
+ store_fail = gen_new_label();
+ store_done = gen_new_label();
+
+ /* check address */
+ tcg_gen_brcond_i32(TCG_COND_NE, t0, cpu_lock_addr, store_fail);
+
+ /* check value */
+ TCGv val = tcg_temp_new();
+ tcg_gen_qemu_ld_tl(val, t0, dc->mem_idx, MO_TEUL);
+ tcg_gen_brcond_i32(TCG_COND_NE, val, cpu_lock_value, store_fail);
+ tcg_temp_free(val);
+
+ /* success of atomic access */
+ tcg_gen_qemu_st_tl(tB, t0, dc->mem_idx, MO_TEUL);
+ tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_F);
+ tcg_gen_br(store_done);
+
+ gen_set_label(store_fail);
+ tcg_gen_andi_tl(cpu_sr, cpu_sr, ~SR_F);
+
+ gen_set_label(store_done);
+ /* the atomic store invalidates the lock address. */
+ tcg_gen_movi_i32(cpu_lock_addr, -1);
+}
+
static void gen_loadstore(DisasContext *dc, uint32 op0,
uint32_t ra, uint32_t rb, uint32_t rd,
uint32_t offset)
{
TCGv t0 = cpu_R[ra];
if (offset != 0) {
- t0 = tcg_temp_new();
+ t0 = tcg_temp_local_new();
tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(offset, 16));
}
switch (op0) {
+ case 0x1b: /* l.lwa */
+ gen_atomic_load(cpu_R[rd], t0, dc);
+ break;
+
case 0x21: /* l.lwz */
tcg_gen_qemu_ld_tl(cpu_R[rd], t0, dc->mem_idx, MO_TEUL);
break;
@@ -289,6 +347,10 @@ static void gen_loadstore(DisasContext *dc, uint32 op0,
tcg_gen_qemu_ld_tl(cpu_R[rd], t0, dc->mem_idx, MO_TESW);
break;
+ case 0x33: /* l.swa */
+ gen_atomic_store(cpu_R[rb], t0, dc);
+ break;
+
case 0x35: /* l.sw */
tcg_gen_qemu_st_tl(cpu_R[rb], t0, dc->mem_idx, MO_TEUL);
break;
@@ -759,9 +821,6 @@ static void dec_calc(DisasContext *dc, uint32_t insn)
}
}
-
-
-