On Fri, 2023-06-02 at 23:04 +0800, Weiwei Li wrote: > > On 2023/6/2 20:16, Rob Bradford wrote: > > This commit adds support for the the amocas.{w,d,q} instructions > > behind > > a new property to enable that instruction. > > > > Signed-off-by: Rob Bradford <rbradf...@rivosinc.com> > > --- > > I also implemented an initial version for this extension without any > tests a few days ago. > > You can find it in > https://github.com/plctlab/plct-qemu/tree/plct-zacas-dev.
Hi, thank you for sharing this! Sorry for the delay in response. I had a chance to test your version and found a small issue with the .Q implementation when I tested it. Here is the fix: diff --git a/target/riscv/insn_trans/trans_rvzacas.c.inc b/target/riscv/insn_trans/trans_rvzacas.c.inc index c063079c6a..e556f48da8 100644 --- a/target/riscv/insn_trans/trans_rvzacas.c.inc +++ b/target/riscv/insn_trans/trans_rvzacas.c.inc @@ -129,11 +129,12 @@ static bool trans_amocas_q(DisasContext *ctx, arg_amocas_q *a) TCGv_i128 src2 = tcg_temp_new_i128(); TCGv_i64 src2l = get_gpr(ctx, a->rs2, EXT_NONE); TCGv_i64 src2h = get_gpr(ctx, a->rs2 == 0 ? 0 : a->rs2 + 1, EXT_NONE); - TCGv_i64 destl = get_gpr(ctx, a->rs2, EXT_NONE); - TCGv_i64 desth = get_gpr(ctx, a->rs2 == 0 ? 0 : a->rs2 + 1, EXT_NONE); + TCGv_i64 destl = get_gpr(ctx, a->rd, EXT_NONE); + TCGv_i64 desth = get_gpr(ctx, a->rd == 0 ? 0 : a->rd + 1, EXT_NONE); tcg_gen_concat_i64_i128(src2, src2l, src2h); tcg_gen_concat_i64_i128(dest, destl, desth); + decode_save_opc(ctx); tcg_gen_atomic_cmpxchg_i128(dest, src1, dest, src2, ctx->mem_idx, (MO_ALIGN | MO_TEUO)); Since you already have an implementation ready i'm happy for your version to be the one included. > > Hope it can help you. > > > target/riscv/cpu.c | 4 + > > target/riscv/cpu.h | 1 + > > target/riscv/insn32.decode | 5 + > > target/riscv/insn_trans/trans_rvzacas.c.inc | 146 > > ++++++++++++++++++++ > > target/riscv/translate.c | 1 + > > 5 files changed, 157 insertions(+) > > create mode 100644 target/riscv/insn_trans/trans_rvzacas.c.inc > > diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c > > index db0875fb43..e99833eb4a 100644 > > --- a/target/riscv/cpu.c > > +++ b/target/riscv/cpu.c > > @@ -82,6 +82,7 @@ static const struct isa_ext_data isa_edata_arr[] > > = { > > ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr), > > ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, > > ext_ifencei), > > ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, > > ext_zihintpause), > > + ISA_EXT_DATA_ENTRY(zacas, PRIV_VERSION_1_12_0, ext_zacas), > > ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), > > ISA_EXT_DATA_ENTRY(zfh, PRIV_VERSION_1_11_0, ext_zfh), > > ISA_EXT_DATA_ENTRY(zfhmin, PRIV_VERSION_1_11_0, ext_zfhmin), > > @@ -1604,6 +1605,9 @@ static Property riscv_cpu_extensions[] = { > > DEFINE_PROP_BOOL("x-zvfh", RISCVCPU, cfg.ext_zvfh, false), > > DEFINE_PROP_BOOL("x-zvfhmin", RISCVCPU, cfg.ext_zvfhmin, > > false), > > > > + /* Atomic CAS (Zacas) */ > > + DEFINE_PROP_BOOL("x-zacas", RISCVCPU, cfg.ext_zacas, false), > > + > > DEFINE_PROP_END_OF_LIST(), > > }; > > > > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h > > index de7e43126a..ac4d9e8e9c 100644 > > --- a/target/riscv/cpu.h > > +++ b/target/riscv/cpu.h > > @@ -436,6 +436,7 @@ struct RISCVCPUConfig { > > bool ext_smaia; > > bool ext_ssaia; > > bool ext_sscofpmf; > > + bool ext_zacas; > > bool rvv_ta_all_1s; > > bool rvv_ma_all_1s; > > > > diff --git a/target/riscv/insn32.decode > > b/target/riscv/insn32.decode > > index 73d5d1b045..97d17ee520 100644 > > --- a/target/riscv/insn32.decode > > +++ b/target/riscv/insn32.decode > > @@ -255,6 +255,11 @@ amomax_d 10100 . . ..... ..... 011 ..... > > 0101111 @atom_st > > amominu_d 11000 . . ..... ..... 011 ..... 0101111 @atom_st > > amomaxu_d 11100 . . ..... ..... 011 ..... 0101111 @atom_st > > > > +# *** Zacas Extension > > +amocas_w 00101 . . ..... ..... 010 ..... 0101111 @atom_st > > +amocas_d 00101 . . ..... ..... 011 ..... 0101111 @atom_st > > +amocas_q 00101 . . ..... ..... 100 ..... 0101111 @atom_st > > + > > # *** RV32F Standard Extension *** > > flw ............ ..... 010 ..... 0000111 @i > > fsw ....... ..... ..... 010 ..... 0100111 @s > > diff --git a/target/riscv/insn_trans/trans_rvzacas.c.inc > > b/target/riscv/insn_trans/trans_rvzacas.c.inc > > new file mode 100644 > > index 0000000000..3f1b58ee8a > > --- /dev/null > > +++ b/target/riscv/insn_trans/trans_rvzacas.c.inc > > @@ -0,0 +1,146 @@ > > +/* > > + * RISC-V translation routines for Zacas extension > > + * > > + * Copyright (c) 2023 Rivos Inc > > + * > > + * This program is free software; you can redistribute it and/or > > modify it > > + * under the terms and conditions of the GNU General Public > > License, > > + * version 2 or later, as published by the Free Software > > Foundation. > > + * > > + * This program is distributed in the hope it will be useful, but > > WITHOUT > > + * ANY WARRANTY; without even the implied warranty of > > MERCHANTABILITY or > > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public > > License for > > + * more details. > > + * > > + * You should have received a copy of the GNU General Public > > License along with > > + * this program. If not, see <http://www.gnu.org/licenses/>. > > + */ > > + > > +#define REQUIRE_ZACAS(ctx) do { \ > > + if (!ctx->cfg_ptr->ext_zacas) { \ > > + return false; \ > > + } \ > > +} while (0) > > + > > +static bool amocas_tl(DisasContext *ctx, arg_atomic *a, MemOp mop) > > +{ > > + TCGv retv = dest_gpr(ctx, a->rd); > > + TCGv addr = get_address(ctx, a->rs1, 0); > > + TCGv newv = get_gpr(ctx, a->rs2, EXT_ZERO); > > + TCGv cmpv = get_gpr(ctx, a->rd, EXT_ZERO); > > + > > + decode_save_opc(ctx); > > + tcg_gen_atomic_cmpxchg_tl(retv, addr, cmpv, newv, ctx- > > >mem_idx, mop); > > + gen_set_gpr(ctx, a->rd, retv); > > + > > + return true; > > +} > > + > > +#if TARGET_LONG_BITS == 32 > > +static bool trans_amocas_w(DisasContext *ctx, arg_amocas_w *a) > > +{ > > + REQUIRE_ZACAS(ctx); > > + return amocas_tl(ctx, a, MO_TESL | MO_ALIGN); > > +} > > +#else > > +static bool trans_amocas_w(DisasContext *ctx, arg_amocas_w *a) > > +{ > > + REQUIRE_64BIT(ctx); > > + REQUIRE_ZACAS(ctx); > > + ctx->ol = MXL_RV32; > > + return amocas_tl(ctx, a, MO_TESL | MO_ALIGN); > > +} > > +#endif > > This seems unnecessary to use different trans function for RV32 and > RV64. > > tcg_gen_atomic_cmpxchg_tl itself will do the actually cmp and xchg > operation based on size in memop > > instead of the target long size. Thank you - I can see from your version how this would work. > > > + > > +#if TARGET_LONG_BITS == 32 > We commonly use TARGET_RISCV32 instead of TARGET_LONG_BITS in similar > cases. Noted - thank you. Richard in another thread also pointed out my version didn't handle dynamic xlen correctly. > > +static bool trans_amocas_d(DisasContext *ctx, arg_amocas_w *a) > > +{ > > + TCGv_i64 retv, newv, cmpv; > > + TCGv_i32 cmpv_l, cmpv_h, newv_l, newv_h; > > + TCGv addr; > > + > > + REQUIRE_ZACAS(ctx); > > + > > + if (a->rd % 2 == 1 || a->rs2 % 2 == 1) { > > + return false; > > + } > > + > > + addr = get_address(ctx, a->rs1, 0); > > + cmpv_l = get_gpr(ctx, a->rd, 0); > It's better to use EXT_NONE instead of 0 in this and following cases. Noted! > > + cmpv_h = a->rd == 0 ? get_gpr(ctx, 0, 0) : get_gpr(ctx, a->rd > > + 1, 0); > We can use ctx->zero for get_gpr(ctx, 0, 0) directly here. Thanks! > > + cmpv = tcg_temp_new_i64(); > > + tcg_gen_concat_i32_i64(cmpv, cmpv_l, cmpv_h); > > + newv_l = get_gpr(ctx, a->rs2, 0); > > + newv_h = a->rs2 == 0 ? get_gpr(ctx, 0, 0) : get_gpr(ctx, a- > > >rs2 + 1, 0); > > + newv = tcg_temp_new_i64(); > > + tcg_gen_concat_i32_i64(newv, newv_l, newv_h); > > + retv = tcg_temp_new_i64(); > > + > > + decode_save_opc(ctx); > > + tcg_gen_atomic_cmpxchg_i64(retv, addr, cmpv, newv, ctx- > > >mem_idx, > > + MO_TESQ | MO_ALIGN); > > + > > + if (a->rd != 0) { > > + TCGv_i32 retv_l = tcg_temp_new_i32(); > > + TCGv_i32 retv_h = tcg_temp_new_i32(); > > + tcg_gen_extr_i64_i32(retv_l, retv_h, retv); > > + gen_set_gpr(ctx, a->rd, retv_l); > > + gen_set_gpr(ctx, a->rd + 1, retv_h); > > + } > > + > > + return true; > > +} > > +#else > > +static bool trans_amocas_d(DisasContext *ctx, arg_amocas_w *a) > > +{ > > + REQUIRE_64BIT(ctx); > This seems not correct. amocas_d can work on RV32, so I think it can > also work on RV64 with xl = 32 > > + REQUIRE_ZACAS(ctx); > > + return amocas_tl(ctx, a, MO_TESQ | MO_ALIGN); > > +} > > +#endif > > + > > +#if TARGET_LONG_BITS == 32 > > +static bool trans_amocas_q(DisasContext *ctx, arg_amocas_w *a) > > +{ > > + return false; > > +} > > This case is unnecessary. > > We can just add REQUIRE_64BIT(ctx) in following trans_amocas_q > Thank you for the review - this was my first TCG patch and i've learnt a lot from that. Cheers, Rob > > +#else > > +static bool trans_amocas_q(DisasContext *ctx, arg_amocas_w *a) > > +{ > > + TCGv_i128 retv, newv, cmpv; > > + TCGv_i64 cmpv_l, cmpv_h, newv_l, newv_h; > > + TCGv addr; > > + > > + REQUIRE_64BIT(ctx); > > + REQUIRE_ZACAS(ctx); > > + > > + if (a->rd % 2 == 1 || a->rs2 % 2 == 1) { > > + return false; > > + } > > + > > + addr = get_address(ctx, a->rs1, 0); > > + cmpv_l = get_gpr(ctx, a->rd, 0); > > + cmpv_h = a->rd == 0 ? get_gpr(ctx, 0, 0) : get_gpr(ctx, a->rd > > + 1, 0); > > + cmpv = tcg_temp_new_i128(); > > + tcg_gen_concat_i64_i128(cmpv, cmpv_l, cmpv_h); > > + newv_l = get_gpr(ctx, a->rs2, 0); > > + newv_h = a->rs2 == 0 ? get_gpr(ctx, 0, 0) : get_gpr(ctx, a- > > >rs2 + 1, 0); > > + newv = tcg_temp_new_i128(); > > + tcg_gen_concat_i64_i128(newv, newv_l, newv_h); > > + retv = tcg_temp_new_i128(); > > + > > + decode_save_opc(ctx); > > + tcg_gen_atomic_cmpxchg_i128(retv, addr, cmpv, newv, ctx- > > >mem_idx, > > + MO_TEUO | MO_ALIGN); > > + > > + if (a->rd != 0) { > > + TCGv_i64 retv_l = tcg_temp_new_i64(); > > + TCGv_i64 retv_h = tcg_temp_new_i64(); > > + tcg_gen_extr_i128_i64(retv_l, retv_h, retv); > > + gen_set_gpr(ctx, a->rd, retv_l); > > + gen_set_gpr(ctx, a->rd + 1, retv_h); > > + } > > + > > + return true; > > +} > > +#endif > > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > > index 928da0d3f0..55438f5ebf 100644 > > --- a/target/riscv/translate.c > > +++ b/target/riscv/translate.c > > @@ -1074,6 +1074,7 @@ static uint32_t opcode_at(DisasContextBase > > *dcbase, target_ulong pc) > > #include "insn_trans/trans_rvv.c.inc" > > #include "insn_trans/trans_rvb.c.inc" > > #include "insn_trans/trans_rvzicond.c.inc" > > +#include "insn_trans/trans_rvzacas.c.inc" > > #include "insn_trans/trans_rvzawrs.c.inc" > > #include "insn_trans/trans_rvzicbo.c.inc" > > #include "insn_trans/trans_rvzfh.c.inc" >