Add emulation for remaining ISV=0 load/store instruction classes.
Atomic memory operations (DDI 0487 C3.3.2):
- LDADD, LDCLR, LDEOR, LDSET: arithmetic/logic atomics
- LDSMAX, LDSMIN, LDUMAX, LDUMIN: signed/unsigned min/max
- SWP: atomic swap
Non-atomic read-modify-write, sufficient for MMIO where concurrent
access is not a concern. Acquire/release semantics are ignored.
Compare-and-swap (DDI 0487 C3.3.1):
- CAS/CASA/CASAL/CASL: single-register compare-and-swap
- CASP/CASPA/CASPAL/CASPL: register-pair compare-and-swap
CASP validates even register pairs; odd or r31 returns UNHANDLED.
Load with PAC (DDI 0487 C6.2.121):
- LDRAA/LDRAB: pointer-authenticated load, offset/pre-indexed
Pointer authentication is not emulated (equivalent to auth always
succeeding), which is correct for MMIO since PAC is a software
security mechanism, not a memory access semantic.
Decodetree differences from TCG:
- %ldra_imm extracts the raw S:imm9 field; the handler scales by
<< 3. TCG applies !function=times_8 in the formatter.
- @ldra uses wildcards for fixed opcode bits that TCG locks down
(bits 31:30, bit 20, bit 11); the fixed bits are matched by the
instruction pattern instead.
- @cas is an explicit format template; TCG uses inline field
extraction.
CASP uses two explicit decode patterns for the 32/64-bit size
variants. LDRA's offset immediate is stored raw in the decode;
the handler scales by << 3.
Signed-off-by: Lucas Amaral <[email protected]>
---
target/arm/emulate/a64-ldst.decode | 45 ++++++
target/arm/emulate/arm_emulate.c | 233 +++++++++++++++++++++++++++++
2 files changed, 278 insertions(+)
diff --git a/target/arm/emulate/a64-ldst.decode
b/target/arm/emulate/a64-ldst.decode
index fadf6fd2..9292bfdf 100644
--- a/target/arm/emulate/a64-ldst.decode
+++ b/target/arm/emulate/a64-ldst.decode
@@ -16,6 +16,16 @@
# Load/store pair (GPR and SIMD/FP)
&ldstpair rt2 rt rn imm sz sign w p
+# Atomic memory operations
+&atomic rs rn rt a r sz
+
+# Compare-and-swap
+&cas rs rn rt sz a r
+
+# Load with PAC (LDRAA/LDRAB, FEAT_PAuth)
+%ldra_imm 22:s1 12:9
+&ldra rt rn imm m w
+
# Load/store register offset
&ldst rm rn rt sign ext sz opt s
@@ -36,6 +46,15 @@
# Load/store pair: imm7 is signed, scaled by element size in handler
@ldstpair .. ... . ... . imm:s7 rt2:5 rn:5 rt:5 &ldstpair
+# Atomics
+@atomic sz:2 ... . .. a:1 r:1 . rs:5 . ... .. rn:5 rt:5 &atomic
+
+# Compare-and-swap: sz extracted by pattern (CAS) or set constant (CASP)
+@cas .. ...... . a:1 . rs:5 r:1 ..... rn:5 rt:5 &cas
+
+# Load with PAC
+@ldra .. ... . .. m:1 . . ......... w:1 . rn:5 rt:5 &ldra
imm=%ldra_imm
+
# Load/store register offset
@ldst .. ... . .. .. . rm:5 opt:3 s:1 .. rn:5 rt:5 &ldst
@@ -241,6 +260,32 @@ STR_v 00 111 1 00 10 1 ..... ... . 10 .....
..... @ldst sign=0 ext=
LDR_v sz:2 111 1 00 01 1 ..... ... . 10 ..... ..... @ldst sign=0
ext=0
LDR_v 00 111 1 00 11 1 ..... ... . 10 ..... ..... @ldst sign=0
ext=0 sz=4
+### Compare-and-swap
+
+# CAS / CASA / CASAL / CASL
+CAS sz:2 001000 1 . 1 ..... . 11111 ..... ..... @cas
+
+# CASP / CASPA / CASPAL / CASPL (pair: Rt,Rt+1 and Rs,Rs+1)
+CASP 00 001000 0 . 1 ..... . 11111 ..... ..... @cas sz=2
+CASP 01 001000 0 . 1 ..... . 11111 ..... ..... @cas sz=3
+
+### Atomic memory operations
+
+LDADD .. 111 0 00 . . 1 ..... 0000 00 ..... ..... @atomic
+LDCLR .. 111 0 00 . . 1 ..... 0001 00 ..... ..... @atomic
+LDEOR .. 111 0 00 . . 1 ..... 0010 00 ..... ..... @atomic
+LDSET .. 111 0 00 . . 1 ..... 0011 00 ..... ..... @atomic
+LDSMAX .. 111 0 00 . . 1 ..... 0100 00 ..... ..... @atomic
+LDSMIN .. 111 0 00 . . 1 ..... 0101 00 ..... ..... @atomic
+LDUMAX .. 111 0 00 . . 1 ..... 0110 00 ..... ..... @atomic
+LDUMIN .. 111 0 00 . . 1 ..... 0111 00 ..... ..... @atomic
+SWP .. 111 0 00 . . 1 ..... 1000 00 ..... ..... @atomic
+
+### Load with PAC (FEAT_PAuth)
+
+# LDRAA (M=0) / LDRAB (M=1), offset (W=0) / pre-indexed (W=1)
+LDRA 11 111 0 00 . . 1 ......... . 1 ..... ..... @ldra
+
### System instructions — DC cache maintenance
# SYS with CRn=C7 covers all data cache operations (DC CIVAC, CVAC, etc.).
diff --git a/target/arm/emulate/arm_emulate.c b/target/arm/emulate/arm_emulate.c
index 52e41703..44a559ad 100644
--- a/target/arm/emulate/arm_emulate.c
+++ b/target/arm/emulate/arm_emulate.c
@@ -499,6 +499,239 @@ static bool trans_LDXP(DisasContext *ctx, arg_stxr *a)
return true;
}
+/*
+ * Atomic memory operations (DDI 0487 C3.3.2)
+ *
+ * Non-atomic read-modify-write; sufficient for MMIO.
+ * Acquire/release semantics ignored (sequentially consistent by design).
+ */
+
+typedef uint64_t (*atomic_op_fn)(uint64_t old, uint64_t operand, int bits);
+
+static uint64_t atomic_add(uint64_t old, uint64_t op, int bits)
+{
+ (void)bits;
+ return old + op;
+}
+
+static uint64_t atomic_clr(uint64_t old, uint64_t op, int bits)
+{
+ (void)bits;
+ return old & ~op;
+}
+
+static uint64_t atomic_eor(uint64_t old, uint64_t op, int bits)
+{
+ (void)bits;
+ return old ^ op;
+}
+
+static uint64_t atomic_set(uint64_t old, uint64_t op, int bits)
+{
+ (void)bits;
+ return old | op;
+}
+
+static uint64_t atomic_smax(uint64_t old, uint64_t op, int bits)
+{
+ int64_t a = sign_extend(old, bits);
+ int64_t b = sign_extend(op, bits);
+ return (a >= b) ? old : op;
+}
+
+static uint64_t atomic_smin(uint64_t old, uint64_t op, int bits)
+{
+ int64_t a = sign_extend(old, bits);
+ int64_t b = sign_extend(op, bits);
+ return (a <= b) ? old : op;
+}
+
+static uint64_t atomic_umax(uint64_t old, uint64_t op, int bits)
+{
+ uint64_t mask = (bits == 64) ? UINT64_MAX : (1ULL << bits) - 1;
+ return ((old & mask) >= (op & mask)) ? old : op;
+}
+
+static uint64_t atomic_umin(uint64_t old, uint64_t op, int bits)
+{
+ uint64_t mask = (bits == 64) ? UINT64_MAX : (1ULL << bits) - 1;
+ return ((old & mask) <= (op & mask)) ? old : op;
+}
+
+static bool do_atomic(DisasContext *ctx, arg_atomic *a, atomic_op_fn fn)
+{
+ int esize = 1 << a->sz;
+ int bits = 8 * esize;
+ uint64_t va = base_read(ctx, a->rn);
+ uint64_t old = 0;
+
+ if (mem_read(ctx, va, &old, esize) != 0) {
+ return true;
+ }
+
+ uint64_t operand = gpr_read(ctx, a->rs);
+ uint64_t result = fn(old, operand, bits);
+
+ if (mem_write(ctx, va, &result, esize) != 0) {
+ return true;
+ }
+
+ /* Rt receives the old value (before modification) */
+ gpr_write(ctx, a->rt, old);
+ return true;
+}
+
+static bool trans_LDADD(DisasContext *ctx, arg_atomic *a)
+{
+ return do_atomic(ctx, a, atomic_add);
+}
+
+static bool trans_LDCLR(DisasContext *ctx, arg_atomic *a)
+{
+ return do_atomic(ctx, a, atomic_clr);
+}
+
+static bool trans_LDEOR(DisasContext *ctx, arg_atomic *a)
+{
+ return do_atomic(ctx, a, atomic_eor);
+}
+
+static bool trans_LDSET(DisasContext *ctx, arg_atomic *a)
+{
+ return do_atomic(ctx, a, atomic_set);
+}
+
+static bool trans_LDSMAX(DisasContext *ctx, arg_atomic *a)
+{
+ return do_atomic(ctx, a, atomic_smax);
+}
+
+static bool trans_LDSMIN(DisasContext *ctx, arg_atomic *a)
+{
+ return do_atomic(ctx, a, atomic_smin);
+}
+
+static bool trans_LDUMAX(DisasContext *ctx, arg_atomic *a)
+{
+ return do_atomic(ctx, a, atomic_umax);
+}
+
+static bool trans_LDUMIN(DisasContext *ctx, arg_atomic *a)
+{
+ return do_atomic(ctx, a, atomic_umin);
+}
+
+static bool trans_SWP(DisasContext *ctx, arg_atomic *a)
+{
+ int esize = 1 << a->sz;
+ uint64_t va = base_read(ctx, a->rn);
+ uint64_t old = 0;
+
+ if (mem_read(ctx, va, &old, esize) != 0) {
+ return true;
+ }
+
+ uint64_t newval = gpr_read(ctx, a->rs);
+ if (mem_write(ctx, va, &newval, esize) != 0) {
+ return true;
+ }
+
+ gpr_write(ctx, a->rt, old);
+ return true;
+}
+
+/* Compare-and-swap: CAS, CASP (DDI 0487 C3.3.1) */
+
+static bool trans_CAS(DisasContext *ctx, arg_cas *a)
+{
+ int esize = 1 << a->sz;
+ uint64_t va = base_read(ctx, a->rn);
+ uint64_t current = 0;
+
+ if (mem_read(ctx, va, ¤t, esize) != 0) {
+ return true;
+ }
+
+ uint64_t mask = (esize == 8) ? UINT64_MAX : (1ULL << (8 * esize)) - 1;
+ uint64_t compare = gpr_read(ctx, a->rs) & mask;
+
+ if ((current & mask) == compare) {
+ uint64_t newval = gpr_read(ctx, a->rt) & mask;
+ if (mem_write(ctx, va, &newval, esize) != 0) {
+ return true;
+ }
+ }
+
+ /* Rs receives the old memory value (whether or not swap occurred) */
+ gpr_write(ctx, a->rs, current);
+ return true;
+}
+
+/* CASP: compare-and-swap pair (Rs,Rs+1 compared; Rt,Rt+1 stored) */
+static bool trans_CASP(DisasContext *ctx, arg_cas *a)
+{
+ /* CASP requires even register pairs; odd or r31 is UNPREDICTABLE */
+ if ((a->rs & 1) || a->rs >= 31 || (a->rt & 1) || a->rt >= 31) {
+ return false;
+ }
+
+ int esize = 1 << a->sz; /* per-register size */
+ uint64_t va = base_read(ctx, a->rn);
+ uint8_t buf[16];
+ uint64_t cur1 = 0, cur2 = 0;
+
+ if (mem_read(ctx, va, buf, 2 * esize) != 0) {
+ return true;
+ }
+ memcpy(&cur1, buf, esize);
+ memcpy(&cur2, buf + esize, esize);
+
+ uint64_t mask = (esize == 8) ? UINT64_MAX : (1ULL << (8 * esize)) - 1;
+ uint64_t cmp1 = gpr_read(ctx, a->rs) & mask;
+ uint64_t cmp2 = gpr_read(ctx, a->rs + 1) & mask;
+
+ if ((cur1 & mask) == cmp1 && (cur2 & mask) == cmp2) {
+ uint64_t new1 = gpr_read(ctx, a->rt) & mask;
+ uint64_t new2 = gpr_read(ctx, a->rt + 1) & mask;
+ memcpy(buf, &new1, esize);
+ memcpy(buf + esize, &new2, esize);
+ if (mem_write(ctx, va, buf, 2 * esize) != 0) {
+ return true;
+ }
+ }
+
+ gpr_write(ctx, a->rs, cur1);
+ gpr_write(ctx, a->rs + 1, cur2);
+ return true;
+}
+
+/*
+ * Load with PAC: LDRAA / LDRAB (FEAT_PAuth)
+ * (DDI 0487 C6.2.121)
+ *
+ * Pointer authentication is not emulated -- the base register is used
+ * directly (equivalent to auth always succeeding).
+ */
+
+static bool trans_LDRA(DisasContext *ctx, arg_ldra *a)
+{
+ int64_t offset = (int64_t)a->imm << 3; /* S:imm9, scaled by 8 */
+ uint64_t base = base_read(ctx, a->rn);
+ uint64_t va = base + offset; /* auth not emulated */
+ uint64_t val = 0;
+
+ if (mem_read(ctx, va, &val, 8) != 0) {
+ return true;
+ }
+
+ gpr_write(ctx, a->rt, val);
+
+ if (a->w) {
+ base_write(ctx, a->rn, va);
+ }
+ return true;
+}
+
/* PRFM, DC cache maintenance -- treated as NOP */
static bool trans_NOP(DisasContext *ctx, arg_NOP *a)
{
--
2.52.0