Add emulation for load/store pair instructions (DDI 0487 C3.3.14 -- C3.3.16). All addressing modes are covered: non-temporal (STNP/LDNP), post-indexed, signed offset, and pre-indexed.
Instruction coverage: - STP/LDP (GPR): 32/64-bit pairs, all addressing modes - STP/LDP (SIMD/FP): 32/64/128-bit pairs, all addressing modes - LDPSW: sign-extending 32-bit pair load - STGP: store allocation tag pair (tag operation is NOP for MMIO) Signed-off-by: Lucas Amaral <[email protected]> --- target/arm/emulate/a64-ldst.decode | 68 ++++++++++++++++++ target/arm/emulate/arm_emulate.c | 111 +++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+) diff --git a/target/arm/emulate/a64-ldst.decode b/target/arm/emulate/a64-ldst.decode index af6babe1..f3de3f86 100644 --- a/target/arm/emulate/a64-ldst.decode +++ b/target/arm/emulate/a64-ldst.decode @@ -10,6 +10,9 @@ # 'u' flag: 0 = 9-bit signed immediate (byte offset), 1 = 12-bit unsigned (needs << sz) &ldst_imm rt rn imm sz sign w p unpriv ext u +# Load/store pair (GPR and SIMD/FP) +&ldstpair rt2 rt rn imm sz sign w p + # Load/store register offset &ldst rm rn rt sign ext sz opt s @@ -24,6 +27,9 @@ # Load/store unsigned offset (12-bit, handler scales by << sz) @ldst_uimm .. ... . .. .. imm:12 rn:5 rt:5 &ldst_imm u=1 unpriv=0 p=0 w=0 +# Load/store pair: imm7 is signed, scaled by element size in handler +@ldstpair .. ... . ... . imm:s7 rt2:5 rn:5 rt:5 &ldstpair + # Load/store register offset @ldst .. ... . .. .. . rm:5 opt:3 s:1 .. rn:5 rt:5 &ldst @@ -128,6 +134,68 @@ STR_v_i 00 111 1 01 10 ............ ..... ..... @ldst_uimm sign= LDR_v_i sz:2 111 1 01 01 ............ ..... ..... @ldst_uimm sign=0 ext=0 LDR_v_i 00 111 1 01 11 ............ ..... ..... @ldst_uimm sign=0 ext=0 sz=4 +### Load/store pair — non-temporal (STNP/LDNP) + +# STNP/LDNP: offset only, no writeback. Non-temporal hint ignored. +STP 00 101 0 000 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP 00 101 0 000 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +STP 10 101 0 000 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP 10 101 0 000 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 00 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP_v 00 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +STP_v 01 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP_v 01 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 10 101 1 000 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 +LDP_v 10 101 1 000 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 + +### Load/store pair — post-indexed + +STP 00 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +LDP 00 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +LDP 01 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=1 w=1 +STP 10 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +LDP 10 101 0 001 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +STP_v 00 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +LDP_v 00 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=1 w=1 +STP_v 01 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +LDP_v 01 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +STP_v 10 101 1 001 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=1 w=1 +LDP_v 10 101 1 001 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=1 w=1 + +### Load/store pair — signed offset + +STP 00 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP 00 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP 01 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=0 w=0 +STP 10 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP 10 101 0 010 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 00 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +LDP_v 00 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=0 +STP_v 01 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +LDP_v 01 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STP_v 10 101 1 010 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 +LDP_v 10 101 1 010 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=0 + +### Load/store pair — pre-indexed + +STP 00 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +LDP 00 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +LDP 01 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=1 p=0 w=1 +STP 10 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +LDP 10 101 0 011 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +STP_v 00 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +LDP_v 00 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=2 sign=0 p=0 w=1 +STP_v 01 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +LDP_v 01 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 +STP_v 10 101 1 011 0 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=1 +LDP_v 10 101 1 011 1 ....... ..... ..... ..... @ldstpair sz=4 sign=0 p=0 w=1 + +### Load/store pair — STGP (store allocation tag + pair) + +STGP 01 101 0 001 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=1 w=1 +STGP 01 101 0 010 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=0 +STGP 01 101 0 011 0 ....... ..... ..... ..... @ldstpair sz=3 sign=0 p=0 w=1 + ### Load/store register — register offset # GPR diff --git a/target/arm/emulate/arm_emulate.c b/target/arm/emulate/arm_emulate.c index bf09e2a6..6c63a0d0 100644 --- a/target/arm/emulate/arm_emulate.c +++ b/target/arm/emulate/arm_emulate.c @@ -122,6 +122,117 @@ static uint64_t load_extend(uint64_t val, int sz, int sign, int ext) return val; } +/* + * Load/store pair: STP, LDP, STNP, LDNP, STGP, LDPSW + * (DDI 0487 C3.3.14 -- C3.3.16) + */ + +static bool trans_STP(DisasContext *ctx, arg_ldstpair *a) +{ + int esize = 1 << a->sz; /* 4 or 8 bytes */ + int64_t offset = (int64_t)a->imm << a->sz; + uint64_t base = base_read(ctx, a->rn); + uint64_t va = a->p ? base : base + offset; /* post-index: unmodified base */ + uint8_t buf[16]; /* max 2 x 8 bytes */ + + uint64_t v1 = gpr_read(ctx, a->rt); + uint64_t v2 = gpr_read(ctx, a->rt2); + memcpy(buf, &v1, esize); + memcpy(buf + esize, &v2, esize); + + if (mem_write(ctx, va, buf, 2 * esize) != 0) { + return true; + } + + if (a->w) { + base_write(ctx, a->rn, base + offset); + } + return true; +} + +static bool trans_LDP(DisasContext *ctx, arg_ldstpair *a) +{ + int esize = 1 << a->sz; + int64_t offset = (int64_t)a->imm << a->sz; + uint64_t base = base_read(ctx, a->rn); + uint64_t va = a->p ? base : base + offset; + uint8_t buf[16]; + uint64_t v1 = 0, v2 = 0; + + if (mem_read(ctx, va, buf, 2 * esize) != 0) { + return true; + } + memcpy(&v1, buf, esize); + memcpy(&v2, buf + esize, esize); + + /* LDPSW: sign-extend 32-bit values to 64-bit (sign=1, sz=2) */ + if (a->sign) { + v1 = sign_extend(v1, 8 * esize); + v2 = sign_extend(v2, 8 * esize); + } + + gpr_write(ctx, a->rt, v1); + gpr_write(ctx, a->rt2, v2); + + if (a->w) { + base_write(ctx, a->rn, base + offset); + } + return true; +} + +/* STGP: tag operation is a NOP for emulation; data stored via STP */ +static bool trans_STGP(DisasContext *ctx, arg_ldstpair *a) +{ + return trans_STP(ctx, a); +} + +/* + * SIMD/FP load/store pair: STP_v, LDP_v + * (DDI 0487 C3.3.14 -- C3.3.16) + */ + +static bool trans_STP_v(DisasContext *ctx, arg_ldstpair *a) +{ + int esize = 1 << a->sz; /* 4, 8, or 16 bytes */ + int64_t offset = (int64_t)a->imm << a->sz; + uint64_t base = base_read(ctx, a->rn); + uint64_t va = a->p ? base : base + offset; + uint8_t buf[32]; /* max 2 x 16 bytes */ + + fpreg_read(ctx, a->rt, buf, esize); + fpreg_read(ctx, a->rt2, buf + esize, esize); + + if (mem_write(ctx, va, buf, 2 * esize) != 0) { + return true; + } + + if (a->w) { + base_write(ctx, a->rn, base + offset); + } + return true; +} + +static bool trans_LDP_v(DisasContext *ctx, arg_ldstpair *a) +{ + int esize = 1 << a->sz; + int64_t offset = (int64_t)a->imm << a->sz; + uint64_t base = base_read(ctx, a->rn); + uint64_t va = a->p ? base : base + offset; + uint8_t buf[32]; + + if (mem_read(ctx, va, buf, 2 * esize) != 0) { + return true; + } + + fpreg_write(ctx, a->rt, buf, esize); + fpreg_write(ctx, a->rt2, buf + esize, esize); + + if (a->w) { + base_write(ctx, a->rn, base + offset); + } + return true; +} + /* Load/store single -- immediate (GPR) (DDI 0487 C3.3.8 -- C3.3.13) */ static bool trans_STR_i(DisasContext *ctx, arg_ldst_imm *a) -- 2.52.0
