Add per-thread Octeon multiplier state for the MPL and P limb banks used by the VMULU/VMM0/V3MULU instruction family.
Octeon3 extends the older MPL0-MPL2/P0-P2 state with high lanes MPL3-MPL5/P3-P5, programmed by the two-source MTM/MTP forms. Represent both banks as uint64_t arrays so the TC state matches the architected 64-bit limb layout used by Octeon68XX user-mode code. Expose MPL/P as global TCG variables so the multiplier translators can expand inline without helper calls. Migrate the multiplier registers in an Octeon-only subsection so non-Octeon CPU models do not grow migration state. Reviewed-by: Richard Henderson <[email protected]> Reviewed-by: Philippe Mathieu-Daudé <[email protected]> Signed-off-by: James Hilliard <[email protected]> Signed-off-by: Richard Henderson <[email protected]> --- Changes v2 -> v3: - Split the multiplier state out of the combined Octeon arithmetic and memory instruction patch. (requested by Richard Henderson) Changes v3 -> v4: - Document and keep the Octeon3 MPL3-MPL5/P3-P5 high-lane state used by the two-source MTM/MTP forms. Changes v7 -> v8: - Incorporate Richard Henderson's v7.5 global TCG variable setup for inline multiplier translation. --- target/mips/cpu.h | 12 ++++++++++++ target/mips/system/machine.c | 33 +++++++++++++++++++++++++++++++++ target/mips/tcg/translate.c | 18 ++++++++++++++++++ target/mips/tcg/translate.h | 2 ++ 4 files changed, 65 insertions(+) diff --git a/target/mips/cpu.h b/target/mips/cpu.h index b478f834c1..346713705a 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -459,6 +459,14 @@ typedef struct mips_def_t mips_def_t; typedef struct TCState TCState; + +/* + * Octeon3 adds a second bank of multiplier/product limbs used by the + * two-source MTM/MTP forms: MPL0..2/P0..2 from rs and MPL3..5/P3..5 from rt. + */ +#define OCTEON_MULTIPLIER_LANES 3 +#define OCTEON_MULTIPLIER_REGS (2 * OCTEON_MULTIPLIER_LANES) + struct TCState { target_ulong gpr[32]; #if defined(TARGET_MIPS64) @@ -497,6 +505,10 @@ struct TCState { target_ulong CP0_TCScheFBack; int32_t CP0_Debug_tcstatus; target_ulong CP0_UserLocal; + struct { + uint64_t MPL[OCTEON_MULTIPLIER_REGS]; + uint64_t P[OCTEON_MULTIPLIER_REGS]; + } octeon; int32_t msacsr; diff --git a/target/mips/system/machine.c b/target/mips/system/machine.c index 5880b401b0..f988b3695b 100644 --- a/target/mips/system/machine.c +++ b/target/mips/system/machine.c @@ -120,6 +120,17 @@ static const VMStateDescription vmstate_inactive_tc = { .fields = vmstate_tc_fields }; +static const VMStateDescription vmstate_octeon_multiplier_tc = { + .name = "cpu/tc/octeon_multiplier", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT64_ARRAY(octeon.MPL, TCState, OCTEON_MULTIPLIER_REGS), + VMSTATE_UINT64_ARRAY(octeon.P, TCState, OCTEON_MULTIPLIER_REGS), + VMSTATE_END_OF_LIST() + } +}; + /* MVP state */ static const VMStateDescription vmstate_mvp = { @@ -247,6 +258,27 @@ static const VMStateDescription mips_vmstate_timer = { } }; +static bool mips_octeon_needed(void *opaque) +{ + MIPSCPU *cpu = opaque; + + return cpu->env.insn_flags & INSN_OCTEON; +} + +static const VMStateDescription mips_vmstate_octeon_multiplier = { + .name = "cpu/octeon_multiplier", + .version_id = 1, + .minimum_version_id = 1, + .needed = mips_octeon_needed, + .fields = (const VMStateField[]) { + VMSTATE_STRUCT(env.active_tc, MIPSCPU, 1, + vmstate_octeon_multiplier_tc, TCState), + VMSTATE_STRUCT_ARRAY(env.tcs, MIPSCPU, MIPS_SHADOW_SET_MAX, 1, + vmstate_octeon_multiplier_tc, TCState), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_mips_cpu = { .name = "cpu", .version_id = 21, @@ -363,6 +395,7 @@ const VMStateDescription vmstate_mips_cpu = { }, .subsections = (const VMStateDescription * const []) { &mips_vmstate_timer, + &mips_vmstate_octeon_multiplier, NULL } }; diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c index dac30aff8d..123d2c89c3 100644 --- a/target/mips/tcg/translate.c +++ b/target/mips/tcg/translate.c @@ -1179,6 +1179,8 @@ static TCGv cpu_lladdr, cpu_llval; static TCGv_i32 hflags; TCGv_i32 fpu_fcr0, fpu_fcr31; TCGv_i64 fpu_f64[32]; +TCGv_i64 oct_mpl[OCTEON_MULTIPLIER_REGS]; +TCGv_i64 oct_p[OCTEON_MULTIPLIER_REGS]; static const char regnames_HI[][4] = { "HI0", "HI1", "HI2", "HI3", @@ -15276,6 +15278,22 @@ void mips_tcg_init(void) active_tc.gpr_hi[i]), rname); } + + for (unsigned i = 0; i < OCTEON_MULTIPLIER_REGS; ++i) { + static const char mpl_names[OCTEON_MULTIPLIER_REGS][5] = { + "MPL0", "MPL1", "MPL2", "MPL3", "MPL4", "MPL5", + }; + static const char p_names[OCTEON_MULTIPLIER_REGS][3] = { + "P0", "P1", "P2", "P3", "P4", "P5", + }; + + oct_mpl[i] = tcg_global_mem_new_i64( + tcg_env, offsetof(CPUMIPSState, active_tc.octeon.MPL[i]), + mpl_names[i]); + oct_p[i] = tcg_global_mem_new_i64( + tcg_env, offsetof(CPUMIPSState, active_tc.octeon.P[i]), + p_names[i]); + } #endif /* !TARGET_MIPS64 */ for (unsigned i = 0; i < 32; i++) { int off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[0]); diff --git a/target/mips/tcg/translate.h b/target/mips/tcg/translate.h index 89dde1e712..ab2e217367 100644 --- a/target/mips/tcg/translate.h +++ b/target/mips/tcg/translate.h @@ -189,6 +189,8 @@ void gen_crc32(DisasContext *ctx, int rd, int rs, int rt, int sz, int crc32c); extern TCGv cpu_gpr[32], cpu_PC; #if defined(TARGET_MIPS64) extern TCGv_i64 cpu_gpr_hi[32]; +extern TCGv_i64 oct_mpl[OCTEON_MULTIPLIER_REGS]; +extern TCGv_i64 oct_p[OCTEON_MULTIPLIER_REGS]; #endif extern TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC]; extern TCGv_i32 fpu_fcr0, fpu_fcr31; -- 2.54.0
