On 1/1/26 19:14, frederic.petrot--- via wrote:
The lq and sq helpers for the experimental rv128 architecture currently
use direct memory accesses.
Replace these direct accesses with the standard tcg_gen_qemu_{ld,st}_i128
TCG helpers that handle endianness issues.

Reported-by: Philippe Mathieu-Daudé <[email protected]>
Suggested-by: Richard Henderson <[email protected]>
Signed-off-by: Frédéric Pétrot <[email protected]>
---
  target/riscv/insn_trans/trans_rvi.c.inc | 32 ++++++++++++++++++-------
  1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
b/target/riscv/insn_trans/trans_rvi.c.inc
index 54b9b4f241..2c82ae41a7 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -377,6 +377,9 @@ static bool gen_load_i128(DisasContext *ctx, arg_lb *a, 
MemOp memop)
      TCGv destl = dest_gpr(ctx, a->rd);
      TCGv desth = dest_gprh(ctx, a->rd);
      TCGv addrl = tcg_temp_new();
+    TCGv_i128 t16 = tcg_temp_new_i128();
+    TCGv_i64 tl = tcg_temp_new_i64();
+    TCGv_i64 th = tcg_temp_new_i64();
tcg_gen_addi_tl(addrl, src1l, a->imm); @@ -388,10 +391,14 @@ static bool gen_load_i128(DisasContext *ctx, arg_lb *a, MemOp memop)
              tcg_gen_movi_tl(desth, 0);
          }
      } else {
-        /* assume little-endian memory access for now */
-        tcg_gen_qemu_ld_tl(destl, addrl, ctx->mem_idx, MO_TEUQ);
-        tcg_gen_addi_tl(addrl, addrl, 8);
-        tcg_gen_qemu_ld_tl(desth, addrl, ctx->mem_idx, MO_TEUQ);
+        tcg_gen_qemu_ld_i128(t16, addrl, ctx->mem_idx, memop);
+        if (mo_endian(ctx) == MO_LE) {
+            tcg_gen_extr_i128_i64(tl, th, t16);
+        } else {
+            tcg_gen_extr_i128_i64(th, tl, t16);
+        }
+        tcg_gen_trunc_i64_tl(destl, tl);
+        tcg_gen_trunc_i64_tl(desth, th);

I'd have rathered avoid the temps / truncate, but I suppose
we are restricted by the GPR being declared as target_ulong,
so this code wouldn't build for rv32 althought it isn't
reachable, and your code is almost a simple copy on rv64.

  bool le = mo_endian(ctx) == MO_LE;
  TCGv destl = (le ? dest_gpr : dest_gprh)(ctx, a->rd);
  TCGv desth = (le ? dest_gprh : dest_gpr)(ctx, a->rd);
  ...
  tcg_gen_extr_i128_i64(destl, desth, t16);

      }
gen_set_gpr128(ctx, a->rd, destl, desth);
@@ -488,16 +495,25 @@ static bool gen_store_i128(DisasContext *ctx, arg_sb *a, 
MemOp memop)
      TCGv src2l = get_gpr(ctx, a->rs2, EXT_NONE);
      TCGv src2h = get_gprh(ctx, a->rs2);
      TCGv addrl = tcg_temp_new();
+    TCGv_i128 t16 = tcg_temp_new_i128();
+    TCGv_i64 tl = tcg_temp_new_i64();
+    TCGv_i64 th = tcg_temp_new_i64();
tcg_gen_addi_tl(addrl, src1l, a->imm); if ((memop & MO_SIZE) <= MO_64) {
          tcg_gen_qemu_st_tl(src2l, addrl, ctx->mem_idx, memop);
      } else {
-        /* little-endian memory access assumed for now */
-        tcg_gen_qemu_st_tl(src2l, addrl, ctx->mem_idx, MO_TEUQ);
-        tcg_gen_addi_tl(addrl, addrl, 8);
-        tcg_gen_qemu_st_tl(src2h, addrl, ctx->mem_idx, MO_TEUQ);
+
+        tcg_gen_ext_tl_i64(tl, src2l);
+        tcg_gen_ext_tl_i64(th, src2h);
+
+        if (mo_endian(ctx) == MO_LE) {
+            tcg_gen_concat_i64_i128(t16, tl, th);

(Ditto)

+        } else {
+            tcg_gen_concat_i64_i128(t16, th, tl);
+        }
+        tcg_gen_qemu_st_i128(t16, addrl, ctx->mem_idx, memop);
      }
      return true;
  }

This approach is good enough for me for now:
Reviewed-by: Philippe Mathieu-Daudé <[email protected]>

Thanks!

Reply via email to