VMM0 performs the VMULU accumulation, returns the low result, then feeds that result back into the MTM0 multiplier state with a zero high operand. It sets MPL[1] to zero, clears partial products, and models the remaining architecturally unpredictable multiplier lanes as zero.
Add helper and translator support for this multiplier chain-update operation. Signed-off-by: James Hilliard <[email protected]> --- Changes v2 -> v3: - Split VMM0 out of the combined Octeon arithmetic and memory instruction patch. (requested by Richard Henderson) Changes v3 -> v4: - Keep the Octeon3 MTM0-style high-lane update and set MPL[3] to zero when feeding the low result back. Changes v5 -> v6: - Zero MPL1 and deterministic-zero the remaining modeled MTM0 lanes after checking the CN71XX VMM0 definition. --- target/mips/helper.h | 1 + target/mips/tcg/octeon.decode | 1 + target/mips/tcg/octeon_translate.c | 1 + target/mips/tcg/op_helper.c | 20 ++++++++++++++++++++ 4 files changed, 23 insertions(+) diff --git a/target/mips/helper.h b/target/mips/helper.h index f1e78ae329..46ccad95c3 100644 --- a/target/mips/helper.h +++ b/target/mips/helper.h @@ -25,6 +25,7 @@ DEF_HELPER_3(crc32, tl, tl, tl, i32) DEF_HELPER_3(crc32c, tl, tl, tl, i32) DEF_HELPER_FLAGS_4(rotx, TCG_CALL_NO_RWG_SE, tl, tl, i32, i32, i32) DEF_HELPER_3(octeon_vmulu, i64, env, i64, i64) +DEF_HELPER_3(octeon_vmm0, i64, env, i64, i64) /* microMIPS functions */ DEF_HELPER_4(lwm, void, env, tl, tl, i32) diff --git a/target/mips/tcg/octeon.decode b/target/mips/tcg/octeon.decode index 74d24c18de..c1c3506d20 100644 --- a/target/mips/tcg/octeon.decode +++ b/target/mips/tcg/octeon.decode @@ -51,6 +51,7 @@ MTP2 011100 rs:5 rt:5 00000 00000 001011 &r2 MTM1 011100 rs:5 rt:5 00000 00000 001100 &r2 MTM2 011100 rs:5 rt:5 00000 00000 001101 &r2 VMULU 011100 ..... ..... ..... 00000 001111 @r3 +VMM0 011100 ..... ..... ..... 00000 010000 @r3 &saa base rt @saa ...... base:5 rt:5 ................ &saa diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c index 62cbbd3c82..304aa58065 100644 --- a/target/mips/tcg/octeon_translate.c +++ b/target/mips/tcg/octeon_translate.c @@ -317,3 +317,4 @@ TRANS(MTP0, trans_mtp, 0); TRANS(MTP1, trans_mtp, 1); TRANS(MTP2, trans_mtp, 2); TRANS(VMULU, trans_vmul, gen_helper_octeon_vmulu); +TRANS(VMM0, trans_vmul, gen_helper_octeon_vmm0); diff --git a/target/mips/tcg/op_helper.c b/target/mips/tcg/op_helper.c index ab3fb06a16..45e208ca43 100644 --- a/target/mips/tcg/op_helper.c +++ b/target/mips/tcg/op_helper.c @@ -176,6 +176,26 @@ uint64_t helper_octeon_vmulu(CPUMIPSState *env, uint64_t rs, uint64_t rt) return sum[0]; } +uint64_t helper_octeon_vmm0(CPUMIPSState *env, uint64_t rs, uint64_t rt) +{ + uint64_t lo = helper_octeon_vmulu(env, rs, rt); + + /* + * VMM0 is architecturally equivalent to VMULU followed by MTM0 with + * the low result and a zero high operand. + */ + env->active_tc.octeon.MPL[0] = lo; + env->active_tc.octeon.MPL[1] = 0; + env->active_tc.octeon.MPL[2] = 0; + env->active_tc.octeon.MPL[3] = 0; + env->active_tc.octeon.MPL[4] = 0; + env->active_tc.octeon.MPL[5] = 0; + for (int i = 0; i < ARRAY_SIZE(env->active_tc.octeon.P); i++) { + env->active_tc.octeon.P[i] = 0; + } + return lo; +} + /* these crc32 functions are based on target/loongarch/tcg/op_helper.c */ target_ulong helper_crc32(target_ulong val, target_ulong m, uint32_t sz) { -- 2.54.0
