From: James Hilliard <[email protected]> VMULU multiplies the active Octeon multiplier state by rs, adds rt and queued partial products, returns the low result, and advances P[0]/P[1] with carry limbs.
Expand the two-limb accumulator operation inline with TCG so the result and partial-product state stay visible to the optimizer. Add a mips64/mips64el linux-user TCG smoke test for representative Octeon multiplier instruction paths. Include hardware-backed regression coverage for MTP0 P1 zeroing. Signed-off-by: James Hilliard <[email protected]> Signed-off-by: Richard Henderson <[email protected]> Tested-by: Philippe Mathieu-Daudé <[email protected]> Signed-off-by: Philippe Mathieu-Daudé <[email protected]> Message-Id: <[email protected]> --- target/mips/tcg/octeon.decode | 2 + target/mips/tcg/octeon_translate.c | 35 +++++++++++++ tests/tcg/mips/user/isa/octeon/octeon-insns.c | 49 +++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/target/mips/tcg/octeon.decode b/target/mips/tcg/octeon.decode index bb0a9f1d99a..36ced0bb333 100644 --- a/target/mips/tcg/octeon.decode +++ b/target/mips/tcg/octeon.decode @@ -55,6 +55,8 @@ MTP0 011100 ..... ..... 00000 00000 001001 @r2 MTP1 011100 ..... ..... 00000 00000 001010 @r2 MTP2 011100 ..... ..... 00000 00000 001011 @r2 +VMULU 011100 ..... ..... ..... 00000 001111 @r3 + &saa base rt @saa ...... base:5 rt:5 ................ &saa SAA 011100 ..... ..... 00000 00000 011000 @saa diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c index 10c7d18ad4f..a21f96b5c2a 100644 --- a/target/mips/tcg/octeon_translate.c +++ b/target/mips/tcg/octeon_translate.c @@ -264,3 +264,38 @@ static bool trans_mtp(DisasContext *ctx, arg_r2 *a, unsigned int index) TRANS(MTP0, trans_mtp, 0); TRANS(MTP1, trans_mtp, 1); TRANS(MTP2, trans_mtp, 2); + +static bool trans_VMULU(DisasContext *ctx, arg_VMULU *a) +{ + TCGv_i64 x[3], y[3], z[3]; + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_constant_i64(0); + + z[0] = y[0] = tcg_temp_new_i64(); + z[1] = y[1] = tcg_temp_new_i64(); + z[2] = y[2] = tcg_temp_new_i64(); + x[0] = tcg_temp_new_i64(); + x[1] = tcg_temp_new_i64(); + x[2] = zero; + + /* Z = rs * (mpl1 : mpl0) + rt */ + gen_load_gpr(tmp, a->rs); + gen_load_gpr(y[0], a->rt); + tcg_gen_mulu2_i64(x[0], x[1], tmp, oct_mpl[0]); + tcg_gen_mulu2_i64(y[1], y[2], tmp, oct_mpl[1]); + tcg_gen_addN_i64(3, z, y, x); + + /* X == (0 : p1 : p0) */ + x[0] = oct_p[0]; + x[1] = oct_p[1]; + + /* Y == (p1 : p0 : tmp) */ + y[0] = tmp; + y[1] = oct_p[0]; + y[2] = oct_p[1]; + + /* (p1 : p0 : rd) = Z + (0 : p1 : p0) */ + tcg_gen_addN_i64(3, y, z, x); + gen_store_gpr(tmp, a->rd); + return true; +} diff --git a/tests/tcg/mips/user/isa/octeon/octeon-insns.c b/tests/tcg/mips/user/isa/octeon/octeon-insns.c index 3c3802ebfee..4647e47f371 100644 --- a/tests/tcg/mips/user/isa/octeon/octeon-insns.c +++ b/tests/tcg/mips/user/isa/octeon/octeon-insns.c @@ -86,6 +86,53 @@ static uint64_t octeon_sne(uint64_t rs, uint64_t rt) return rd; } +static uint64_t octeon_vmulu(uint64_t mpl0, uint64_t rs, uint64_t rt) +{ + uint64_t rd; + + asm volatile( + "move $8, %[mpl0]\n\t" + "move $9, $0\n\t" + ".word 0x71090008\n\t" /* mtm0 $8, $9 */ + "move $8, %[rs]\n\t" + "move $9, %[rt]\n\t" + ".word 0x7109500f\n\t" /* vmulu $10, $8, $9 */ + "move %[rd], $10\n\t" + : [rd] "=r" (rd) + : [mpl0] "r" (mpl0), [rs] "r" (rs), [rt] "r" (rt) + : "$8", "$9", "$10"); + + return rd; +} + +static uint64_t octeon_mtp0_zeroes_p1(void) +{ + uint64_t rd; + + asm volatile( + "move $8, %[mpl0]\n\t" + "move $9, $0\n\t" + ".word 0x71090008\n\t" /* mtm0 $8, $9 */ + "move $8, %[p1]\n\t" + "move $9, $0\n\t" + ".word 0x7109000a\n\t" /* mtp1 $8, $9 */ + "move $8, $0\n\t" + "move $9, $0\n\t" + ".word 0x71090009\n\t" /* mtp0 $8, $9 */ + "move $8, $0\n\t" + "move $9, $0\n\t" + ".word 0x7109500f\n\t" /* vmulu $10, $8, $9 */ + "move $8, $0\n\t" + "move $9, $0\n\t" + ".word 0x7109500f\n\t" /* vmulu $10, $8, $9 */ + "move %[rd], $10\n\t" + : [rd] "=r" (rd) + : [mpl0] "r" (0ULL), [p1] "r" (1ULL) + : "$8", "$9", "$10"); + + return rd; +} + int main(void) { assert(octeon_baddu(0x123, 0x0f0) == 0x13); @@ -95,6 +142,8 @@ int main(void) assert(octeon_seq(0xabc, 0xdef) == 0); assert(octeon_sne(0xabc, 0xabc) == 0); assert(octeon_sne(0xabc, 0xdef) == 1); + assert(octeon_vmulu(5, 7, 11) == 46); + assert(octeon_mtp0_zeroes_p1() == 0); return 0; } -- 2.53.0
