From: James Hilliard <[email protected]> V3MULU extends VMULU across the full Octeon3 multiplier state, adding rt and queued partial products.
Return the low result while shifting the remaining accumulated limbs back into P[0] through P[5]. Reviewed-by: Richard Henderson <[email protected]> Signed-off-by: James Hilliard <[email protected]> Signed-off-by: Richard Henderson <[email protected]> Signed-off-by: Philippe Mathieu-Daudé <[email protected]> --- target/mips/tcg/octeon.decode | 1 + target/mips/tcg/octeon_translate.c | 40 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/target/mips/tcg/octeon.decode b/target/mips/tcg/octeon.decode index f9c32e1dee1..4d0ad058347 100644 --- a/target/mips/tcg/octeon.decode +++ b/target/mips/tcg/octeon.decode @@ -57,6 +57,7 @@ MTP2 011100 ..... ..... 00000 00000 001011 @r2 VMULU 011100 ..... ..... ..... 00000 001111 @r3 VMM0 011100 ..... ..... ..... 00000 010000 @r3 +V3MULU 011100 ..... ..... ..... 00000 010001 @r3 &saa base rt @saa ...... base:5 rt:5 ................ &saa diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c index e51d3e6cf78..04e51538f93 100644 --- a/target/mips/tcg/octeon_translate.c +++ b/target/mips/tcg/octeon_translate.c @@ -316,3 +316,43 @@ static bool trans_VMM0(DisasContext *ctx, arg_VMM0 *a) octeon_zero_partial_product_state(); return true; } + +static bool trans_V3MULU(DisasContext *ctx, arg_V3MULU *a) +{ + TCGv_i64 x[7], y[7], z[7]; + TCGv_i64 tmp = tcg_temp_new_i64(); + + for (int i = 0; i < 7; ++i) { + z[i] = tcg_temp_new_i64(); + y[i] = tcg_temp_new_i64(); + } + memcpy(&x[0], z, 6 * sizeof(TCGv_i64)); + x[6] = tcg_constant_i64(0); + + /* + * Z = rs * mpl -- 64x384->448 bit multiply + * Compute even partial products into X and odd partial products into Y. + * Include RT into the odd partial products, which are 0 in bits [63:0]. + */ + gen_load_gpr(tmp, a->rs); + gen_load_gpr(y[0], a->rt); + for (int i = 0; i < 6; i += 2) { + tcg_gen_mulu2_i64(x[i + 0], x[i + 1], tmp, oct_mpl[i]); + tcg_gen_mulu2_i64(y[i + 1], y[i + 2], tmp, oct_mpl[i + 1]); + } + + /* Sum even and odd to produce final product, plus rt. */ + tcg_gen_addN_i64(7, z, x, y); + + /* X == (0 : p5 : p4 : p3 : p2 : p1 : p0) -- x[6] is still 0 */ + memcpy(&x[0], oct_p, 6 * sizeof(TCGv_i64)); + + /* Y == (p5 : p4 : p3 : p2 : p1 : p0 : tmp) */ + memcpy(&y[1], oct_p, 6 * sizeof(TCGv_i64)); + y[0] = tmp; + + /* (p* : rd) = (0 : p*) + (rs * mpl + rt) */ + tcg_gen_addN_i64(7, y, x, z); + gen_store_gpr(tmp, a->rd); + return true; +} -- 2.53.0
