Implement the Octeon LA* read-modify-write atomic instruction family: LAI/LAID, LAD/LADD, LAA/LAAD, LAS/LASD, LAC/LACD, and LAW/LAWD.
These operations are architecturally distinct from SAA/SAAD and are used by existing Octeon user-mode code for atomic counters, bit operations, and exchange-style updates. Signed-off-by: James Hilliard <[email protected]> --- Changes v1 -> v2: - Keep LA* atomics naturally aligned per Octeon L2 transaction semantics. - Use explicit i64 TCG ops in the LA* translator paths. (suggested by Philippe Mathieu-Daudé) Changes v2 -> v3: - Drop redundant TARGET_LONG_BITS guards from doubleword atomic paths. (suggested by Richard Henderson) - Group LA* translator wrappers by argument shape instead of adding one wrapper per instruction. (suggested by Richard Henderson) Changes v3 -> v4: - Use i64 atomic helpers for both word and doubleword paths and select word sign-extension through MO_SL. (suggested by Richard Henderson) Changes v5 -> v6: - Rename the shared translator helpers to distinguish fetch-add and exchange operations. --- target/mips/tcg/octeon.decode | 17 +++++++++ target/mips/tcg/octeon_translate.c | 74 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/target/mips/tcg/octeon.decode b/target/mips/tcg/octeon.decode index 989762fffc..ee1d636e4e 100644 --- a/target/mips/tcg/octeon.decode +++ b/target/mips/tcg/octeon.decode @@ -64,6 +64,23 @@ V3MULU 011100 ..... ..... ..... 00000 010001 @r3 SAA 011100 ..... ..... 00000 00000 011000 @saa SAAD 011100 ..... ..... 00000 00000 011001 @saa +&la base rd +&laa base add rd +@la ...... base:5 ..... rd:5 ........... &la +@laa ...... base:5 add:5 rd:5 ........... &laa +LAI 011100 ..... 00000 ..... 00010 011111 @la +LAID 011100 ..... 00000 ..... 00011 011111 @la +LAD 011100 ..... 00000 ..... 00110 011111 @la +LADD 011100 ..... 00000 ..... 00111 011111 @la +LAA 011100 ..... ..... ..... 10010 011111 @laa +LAAD 011100 ..... ..... ..... 10011 011111 @laa +LAS 011100 ..... 00000 ..... 01010 011111 @la +LASD 011100 ..... 00000 ..... 01011 011111 @la +LAC 011100 ..... 00000 ..... 01110 011111 @la +LACD 011100 ..... 00000 ..... 01111 011111 @la +LAW 011100 ..... ..... ..... 10110 011111 @laa +LAWD 011100 ..... ..... ..... 10111 011111 @laa + &zcb base ZCB 011100 base:5 00000 00000 1110- 011111 &zcb diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c index f5c976db0d..b4a4243560 100644 --- a/target/mips/tcg/octeon_translate.c +++ b/target/mips/tcg/octeon_translate.c @@ -195,6 +195,68 @@ TRANS(SAAD, trans_saa, MO_64); TRANS(QMAC, trans_qmac, gen_helper_octeon_qmac); TRANS(QMACS, trans_qmac, gen_helper_octeon_qmacs); +static bool trans_la_fetch_add(DisasContext *ctx, int base, int add_reg, + int rd, int64_t imm, MemOp mop) +{ + TCGv_i64 addr = tcg_temp_new_i64(); + TCGv_i64 value = tcg_temp_new_i64(); + TCGv_i64 old = tcg_temp_new_i64(); + MemOp amo = mo_endian(ctx) | mop | MO_ALIGN; + + gen_base_offset_addr(ctx, addr, base, 0); + + if (add_reg >= 0) { + gen_load_gpr(value, add_reg); + } else { + tcg_gen_movi_i64(value, imm); + } + + tcg_gen_atomic_fetch_add_i64(old, addr, value, ctx->mem_idx, amo); + gen_store_gpr(old, rd); + return true; +} + +static bool trans_la_xchg(DisasContext *ctx, int base, int add_reg, int rd, + int64_t imm, MemOp mop) +{ + TCGv_i64 addr = tcg_temp_new_i64(); + TCGv_i64 value = tcg_temp_new_i64(); + TCGv_i64 old = tcg_temp_new_i64(); + MemOp amo = mo_endian(ctx) | mop | MO_ALIGN; + + gen_base_offset_addr(ctx, addr, base, 0); + + if (add_reg >= 0) { + gen_load_gpr(value, add_reg); + } else { + tcg_gen_movi_i64(value, imm); + } + + tcg_gen_atomic_xchg_i64(old, addr, value, ctx->mem_idx, amo); + gen_store_gpr(old, rd); + return true; +} + +static bool do_la_imm_add(DisasContext *ctx, arg_la *a, int64_t imm, MemOp mop) +{ + return trans_la_fetch_add(ctx, a->base, -1, a->rd, imm, mop); +} + +static bool do_la_reg_add(DisasContext *ctx, arg_laa *a, MemOp mop) +{ + return trans_la_fetch_add(ctx, a->base, a->add, a->rd, 0, mop); +} + +static bool do_la_imm_xchg(DisasContext *ctx, arg_la *a, int64_t imm, MemOp mop) +{ + return trans_la_xchg(ctx, a->base, -1, a->rd, imm, mop); +} + +static bool do_la_reg_xchg(DisasContext *ctx, arg_laa *a, MemOp mop) +{ + return trans_la_xchg(ctx, a->base, a->add, a->rd, 0, mop); +} + static bool trans_ZCB(DisasContext *ctx, arg_ZCB *a) { TCGv_i64 addr = tcg_temp_new_i64(); @@ -319,6 +381,18 @@ static bool trans_vmul(DisasContext *ctx, arg_decode_ext_octeon1 *a, return true; } +TRANS(LAI, do_la_imm_add, 1, MO_SL); +TRANS(LAID, do_la_imm_add, 1, MO_UQ); +TRANS(LAD, do_la_imm_add, -1, MO_SL); +TRANS(LADD, do_la_imm_add, -1, MO_UQ); +TRANS(LAA, do_la_reg_add, MO_SL); +TRANS(LAAD, do_la_reg_add, MO_UQ); +TRANS(LAS, do_la_imm_xchg, -1, MO_SL); +TRANS(LASD, do_la_imm_xchg, -1, MO_UQ); +TRANS(LAC, do_la_imm_xchg, 0, MO_SL); +TRANS(LACD, do_la_imm_xchg, 0, MO_UQ); +TRANS(LAW, do_la_reg_xchg, MO_SL); +TRANS(LAWD, do_la_reg_xchg, MO_UQ); TRANS(LBX, trans_lx, MO_SB); TRANS(LBUX, trans_lx, MO_UB); TRANS(LHX, trans_lx, MO_SW); -- 2.54.0
