From: James Hilliard <[email protected]> VMM0 multiplies MPL[0] by rs, adds rt and the queued P[0] partial product, returns the low result, and feeds that result back into MPL[0]. It sets MPL[1] to zero and clears partial products.
Include hardware-backed regression coverage for VMM0 MPL1 zeroing. Reviewed-by: Richard Henderson <[email protected]> Signed-off-by: James Hilliard <[email protected]> Signed-off-by: Richard Henderson <[email protected]> Tested-by: Philippe Mathieu-Daudé <[email protected]> Reviewed-by: Philippe Mathieu-Daudé <[email protected]> Signed-off-by: Philippe Mathieu-Daudé <[email protected]> --- target/mips/tcg/octeon.decode | 1 + target/mips/tcg/octeon_translate.c | 16 ++++++ tests/tcg/mips/user/isa/octeon/octeon-insns.c | 55 +++++++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/target/mips/tcg/octeon.decode b/target/mips/tcg/octeon.decode index 36ced0bb333..f9c32e1dee1 100644 --- a/target/mips/tcg/octeon.decode +++ b/target/mips/tcg/octeon.decode @@ -56,6 +56,7 @@ MTP1 011100 ..... ..... 00000 00000 001010 @r2 MTP2 011100 ..... ..... 00000 00000 001011 @r2 VMULU 011100 ..... ..... ..... 00000 001111 @r3 +VMM0 011100 ..... ..... ..... 00000 010000 @r3 &saa base rt @saa ...... base:5 rt:5 ................ &saa diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c index a21f96b5c2a..94f3cf9edd8 100644 --- a/target/mips/tcg/octeon_translate.c +++ b/target/mips/tcg/octeon_translate.c @@ -299,3 +299,19 @@ static bool trans_VMULU(DisasContext *ctx, arg_VMULU *a) gen_store_gpr(tmp, a->rd); return true; } + +static bool trans_VMM0(DisasContext *ctx, arg_VMM0 *a) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + + gen_load_gpr(tmp, a->rs); + tcg_gen_mul_i64(oct_mpl[0], oct_mpl[0], tmp); + gen_load_gpr(tmp, a->rt); + tcg_gen_add_i64(oct_mpl[0], oct_mpl[0], tmp); + tcg_gen_add_i64(oct_mpl[0], oct_mpl[0], oct_p[0]); + gen_store_gpr(oct_mpl[0], a->rd); + + tcg_gen_movi_i64(oct_mpl[1], 0); + octeon_zero_partial_product_state(); + return true; +} diff --git a/tests/tcg/mips/user/isa/octeon/octeon-insns.c b/tests/tcg/mips/user/isa/octeon/octeon-insns.c index 4647e47f371..9153e37e9e0 100644 --- a/tests/tcg/mips/user/isa/octeon/octeon-insns.c +++ b/tests/tcg/mips/user/isa/octeon/octeon-insns.c @@ -105,6 +105,59 @@ static uint64_t octeon_vmulu(uint64_t mpl0, uint64_t rs, uint64_t rt) return rd; } +static uint64_t octeon_vmm0(uint64_t mpl0, uint64_t p0, + uint64_t rs, uint64_t rt) +{ + uint64_t rd; + + asm volatile( + "move $8, %[mpl0]\n\t" + "move $9, $0\n\t" + ".word 0x71090008\n\t" /* mtm0 $8, $9 */ + "move $8, %[p0]\n\t" + "move $9, $0\n\t" + ".word 0x71090009\n\t" /* mtp0 $8, $9 */ + "move $8, %[rs]\n\t" + "move $9, %[rt]\n\t" + ".word 0x71095010\n\t" /* vmm0 $10, $8, $9 */ + "move %[rd], $10\n\t" + : [rd] "=r" (rd) + : [mpl0] "r" (mpl0), [p0] "r" (p0), + [rs] "r" (rs), [rt] "r" (rt) + : "$8", "$9", "$10"); + + return rd; +} + +static uint64_t octeon_vmm0_zeroes_mpl1(void) +{ + uint64_t rd; + + asm volatile( + "move $8, %[mpl0]\n\t" + "move $9, $0\n\t" + ".word 0x71090008\n\t" /* mtm0 $8, $9 */ + "move $8, %[mpl1]\n\t" + "move $9, $0\n\t" + ".word 0x7109000c\n\t" /* mtm1 $8, $9 */ + "move $8, %[vmm0_rs]\n\t" + "move $9, $0\n\t" + ".word 0x71095010\n\t" /* vmm0 $10, $8, $9 */ + "move $8, %[vmulu_rs]\n\t" + "move $9, $0\n\t" + ".word 0x7109500f\n\t" /* vmulu $10, $8, $9 */ + "move $8, $0\n\t" + "move $9, $0\n\t" + ".word 0x7109500f\n\t" /* vmulu $10, $8, $9 */ + "move %[rd], $10\n\t" + : [rd] "=r" (rd) + : [mpl0] "r" (1ULL), [mpl1] "r" (1ULL), + [vmm0_rs] "r" (2ULL), [vmulu_rs] "r" (1ULL) + : "$8", "$9", "$10"); + + return rd; +} + static uint64_t octeon_mtp0_zeroes_p1(void) { uint64_t rd; @@ -143,6 +196,8 @@ int main(void) assert(octeon_sne(0xabc, 0xabc) == 0); assert(octeon_sne(0xabc, 0xdef) == 1); assert(octeon_vmulu(5, 7, 11) == 46); + assert(octeon_vmm0(5, 13, 7, 11) == 59); + assert(octeon_vmm0_zeroes_mpl1() == 0); assert(octeon_mtp0_zeroes_p1() == 0); return 0; -- 2.53.0
