VMM0 performs the VMULU accumulation, returns the low result, then feeds that result back into the MTM0 multiplier state with a zero high operand and clears partial products.
Add helper and translator support for this multiplier chain-update operation. Signed-off-by: James Hilliard <[email protected]> --- Changes v2 -> v3: - Split VMM0 out of the combined Octeon arithmetic and memory instruction patch. (requested by Richard Henderson) Changes v3 -> v4: - Keep the Octeon3 MTM0-style high-lane update and set MPL[3] to zero when feeding the low result back. --- target/mips/helper.h | 1 + target/mips/tcg/octeon.decode | 1 + target/mips/tcg/octeon_translate.c | 1 + target/mips/tcg/op_helper.c | 16 ++++++++++++++++ 4 files changed, 19 insertions(+) diff --git a/target/mips/helper.h b/target/mips/helper.h index f1e78ae329..46ccad95c3 100644 --- a/target/mips/helper.h +++ b/target/mips/helper.h @@ -25,6 +25,7 @@ DEF_HELPER_3(crc32, tl, tl, tl, i32) DEF_HELPER_3(crc32c, tl, tl, tl, i32) DEF_HELPER_FLAGS_4(rotx, TCG_CALL_NO_RWG_SE, tl, tl, i32, i32, i32) DEF_HELPER_3(octeon_vmulu, i64, env, i64, i64) +DEF_HELPER_3(octeon_vmm0, i64, env, i64, i64) /* microMIPS functions */ DEF_HELPER_4(lwm, void, env, tl, tl, i32) diff --git a/target/mips/tcg/octeon.decode b/target/mips/tcg/octeon.decode index 75834afc6c..c60af2d39a 100644 --- a/target/mips/tcg/octeon.decode +++ b/target/mips/tcg/octeon.decode @@ -51,6 +51,7 @@ MTP2 011100 rs:5 rt:5 00000 00000 001011 &r2 MTM1 011100 rs:5 rt:5 00000 00000 001100 &r2 MTM2 011100 rs:5 rt:5 00000 00000 001101 &r2 VMULU 011100 ..... ..... ..... 00000 001111 @r3 +VMM0 011100 ..... ..... ..... 00000 010000 @r3 &saa base rt @saa ...... base:5 rt:5 ................ &saa diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c index d39f904e73..63e2719b1f 100644 --- a/target/mips/tcg/octeon_translate.c +++ b/target/mips/tcg/octeon_translate.c @@ -297,3 +297,4 @@ TRANS(MTP0, trans_mtp, 0); TRANS(MTP1, trans_mtp, 1); TRANS(MTP2, trans_mtp, 2); TRANS(VMULU, trans_vmul, gen_helper_octeon_vmulu); +TRANS(VMM0, trans_vmul, gen_helper_octeon_vmm0); diff --git a/target/mips/tcg/op_helper.c b/target/mips/tcg/op_helper.c index ab3fb06a16..124855d4dc 100644 --- a/target/mips/tcg/op_helper.c +++ b/target/mips/tcg/op_helper.c @@ -176,6 +176,22 @@ uint64_t helper_octeon_vmulu(CPUMIPSState *env, uint64_t rs, uint64_t rt) return sum[0]; } +uint64_t helper_octeon_vmm0(CPUMIPSState *env, uint64_t rs, uint64_t rt) +{ + uint64_t lo = helper_octeon_vmulu(env, rs, rt); + + /* + * Complete the VMULU accumulation, then apply the MTM0-style state + * update with the low result and a zero high operand. + */ + env->active_tc.octeon.MPL[0] = lo; + env->active_tc.octeon.MPL[3] = 0; + for (int i = 0; i < ARRAY_SIZE(env->active_tc.octeon.P); i++) { + env->active_tc.octeon.P[i] = 0; + } + return lo; +} + /* these crc32 functions are based on target/loongarch/tcg/op_helper.c */ target_ulong helper_crc32(target_ulong val, target_ulong m, uint32_t sz) { -- 2.54.0
