On 20/5/26 19:23, Philippe Mathieu-Daudé wrote:
From: James Hilliard <[email protected]>
Implement the Octeon LA* read-modify-write atomic instruction family:
LAI/LAID, LAD/LADD, LAA/LAAD, LAS/LASD, LAC/LACD, and LAW/LAWD.
These operations are architecturally distinct from SAA/SAAD and are used
by existing Octeon user-mode code for atomic counters, bit operations,
and exchange-style updates.
Reviewed-by: Richard Henderson <[email protected]>
Signed-off-by: James Hilliard <[email protected]>
Signed-off-by: Philippe Mathieu-Daudé <[email protected]>
---
target/mips/tcg/octeon.decode | 17 +++++++++++
target/mips/tcg/octeon_translate.c | 49 ++++++++++++++++++++++++++++++
2 files changed, 66 insertions(+)
diff --git a/target/mips/tcg/octeon.decode b/target/mips/tcg/octeon.decode
index 2d02b4e0bc3..1e44c588dd6 100644
--- a/target/mips/tcg/octeon.decode
+++ b/target/mips/tcg/octeon.decode
@@ -68,6 +68,23 @@ V3MULU 011100 ..... ..... ..... 00000 010001 @r3
SAA 011100 ..... ..... 00000 00000 011000 @saa
SAAD 011100 ..... ..... 00000 00000 011001 @saa
+&la base rd
+&laa base add rd
+@la ...... base:5 ..... rd:5 ........... &la
+@laa ...... base:5 add:5 rd:5 ........... &laa
+LAI 011100 ..... 00000 ..... 00010 011111 @la
+LAID 011100 ..... 00000 ..... 00011 011111 @la
+LAD 011100 ..... 00000 ..... 00110 011111 @la
+LADD 011100 ..... 00000 ..... 00111 011111 @la
+LAA 011100 ..... ..... ..... 10010 011111 @laa
+LAAD 011100 ..... ..... ..... 10011 011111 @laa
+LAS 011100 ..... 00000 ..... 01010 011111 @la
+LASD 011100 ..... 00000 ..... 01011 011111 @la
+LAC 011100 ..... 00000 ..... 01110 011111 @la
+LACD 011100 ..... 00000 ..... 01111 011111 @la
+LAW 011100 ..... ..... ..... 10110 011111 @laa
+LAWD 011100 ..... ..... ..... 10111 011111 @laa
+
&zcb base
ZCB 011100 base:5 00000 00000 1110- 011111 &zcb
diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c
index 90bd68cbf25..47d46999487 100644
--- a/target/mips/tcg/octeon_translate.c
+++ b/target/mips/tcg/octeon_translate.c
@@ -185,6 +185,55 @@ static bool trans_saa(DisasContext *ctx, arg_saa *a, MemOp
mop)
TRANS(SAA, trans_saa, MO_32);
TRANS(SAAD, trans_saa, MO_64);
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_va, TCGv_i64, TCGArg, MemOp);
+
+static bool do_atomic_ld(DisasContext *ctx, AtomicThreeOpFn *atomic_fn,
+ int base, int add_reg, int rd, int64_t imm, MemOp mop)
+{
+ TCGv_i64 addr = tcg_temp_new_i64();
+ TCGv_i64 old = tcg_temp_new_i64();
+ TCGv_i64 value;
+ MemOp amo = mo_endian(ctx) | mop | MO_ALIGN;
+
+ gen_base_offset_addr(ctx, addr, base, 0);
+
+ if (add_reg >= 0) {
+ value = tcg_temp_new_i64();
+ gen_load_gpr(value, add_reg);
+ } else {
+ value = tcg_constant_i64(imm);
+ }
+
+ atomic_fn(old, addr, value, ctx->mem_idx, amo);
+ gen_store_gpr(old, rd);
+ return true;
+}
+
+static bool do_atomic_la(DisasContext *ctx, arg_la *a, AtomicThreeOpFn *fn,
+ int64_t imm, MemOp mop)
+{
+ return do_atomic_ld(ctx, fn, a->base, -1, a->rd, imm, mop);
+}
+
+static bool do_atomic_laa(DisasContext *ctx, arg_laa *a, AtomicThreeOpFn *fn,
+ int64_t imm, MemOp mop)
+{
+ return do_atomic_ld(ctx, fn, a->base, a->add, a->rd, imm, mop);
+}
+
+TRANS(LAI, do_atomic_la, tcg_gen_atomic_fetch_add_i64, 1, MO_SL);
+TRANS(LAID, do_atomic_la, tcg_gen_atomic_fetch_add_i64, 1, MO_UQ);
+TRANS(LAD, do_atomic_la, tcg_gen_atomic_fetch_add_i64, -1, MO_SL);
+TRANS(LADD, do_atomic_la, tcg_gen_atomic_fetch_add_i64, -1, MO_UQ);
+TRANS(LAA, do_atomic_laa, tcg_gen_atomic_fetch_add_i64, 0, MO_SL);
+TRANS(LAAD, do_atomic_laa, tcg_gen_atomic_fetch_add_i64, 0, MO_UQ);
+TRANS(LAS, do_atomic_la, tcg_gen_atomic_xchg_i64, -1, MO_SL);
+TRANS(LASD, do_atomic_la, tcg_gen_atomic_xchg_i64, -1, MO_UQ);
+TRANS(LAC, do_atomic_la, tcg_gen_atomic_xchg_i64, 0, MO_SL);
+TRANS(LACD, do_atomic_la, tcg_gen_atomic_xchg_i64, 0, MO_UQ);
+TRANS(LAW, do_atomic_laa, tcg_gen_atomic_xchg_i64, 0, MO_SL);
+TRANS(LAWD, do_atomic_laa, tcg_gen_atomic_xchg_i64, 0, MO_UQ);
I actually changed my mind for:
-- >8 --
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_va, TCGv_i64, TCGArg, MemOp);
+
+static bool do_atomic_la(DisasContext *ctx, arg_la *a,
+ AtomicThreeOpFn *atomic_fn, int64_t imm, MemOp
mop)
+{
+ TCGv_i64 addr = tcg_temp_new_i64();
+ TCGv_i64 old = tcg_temp_new_i64();
+ MemOp amo = mo_endian(ctx) | mop | MO_ALIGN;
+
+ gen_base_offset_addr(ctx, addr, a->base, 0);
+
+ atomic_fn(old, addr, tcg_constant_i64(imm), ctx->mem_idx, amo);
+ gen_store_gpr(old, a->rd);
+ return true;
+}
+
+static bool do_atomic_laa(DisasContext *ctx, arg_laa *a,
+ AtomicThreeOpFn *atomic_fn, MemOp mop)
+{
+ TCGv_i64 addr = tcg_temp_new_i64();
+ TCGv_i64 old = tcg_temp_new_i64();
+ TCGv_i64 value = tcg_temp_new_i64();
+ MemOp amo = mo_endian(ctx) | mop | MO_ALIGN;
+
+ gen_base_offset_addr(ctx, addr, a->base, 0);
+ gen_load_gpr(value, a->add);
+
+ atomic_fn(old, addr, value, ctx->mem_idx, amo);
+ gen_store_gpr(old, a->rd);
+ return true;
+}
+
+TRANS(LAI, do_atomic_la, tcg_gen_atomic_fetch_add_i64, 1, MO_SL);
+TRANS(LAID, do_atomic_la, tcg_gen_atomic_fetch_add_i64, 1, MO_UQ);
+TRANS(LAD, do_atomic_la, tcg_gen_atomic_fetch_add_i64, -1, MO_SL);
+TRANS(LADD, do_atomic_la, tcg_gen_atomic_fetch_add_i64, -1, MO_UQ);
+TRANS(LAA, do_atomic_laa, tcg_gen_atomic_fetch_add_i64, MO_SL);
+TRANS(LAAD, do_atomic_laa, tcg_gen_atomic_fetch_add_i64, MO_UQ);
+TRANS(LAS, do_atomic_la, tcg_gen_atomic_xchg_i64, -1, MO_SL);
+TRANS(LASD, do_atomic_la, tcg_gen_atomic_xchg_i64, -1, MO_UQ);
+TRANS(LAC, do_atomic_la, tcg_gen_atomic_xchg_i64, 0, MO_SL);
+TRANS(LACD, do_atomic_la, tcg_gen_atomic_xchg_i64, 0, MO_UQ);
+TRANS(LAW, do_atomic_laa, tcg_gen_atomic_xchg_i64, MO_SL);
+TRANS(LAWD, do_atomic_laa, tcg_gen_atomic_xchg_i64, MO_UQ);
---