Add helper support for the Octeon GFM carryless multiply selectors. This models the normal and reflected multiplication paths, including the XOR-and-multiply forms that update the result/input state used by Octeon crypto code.
Implement the reflected path with the same architectural 128-bit state model as the normal GFM helpers, avoiding target-local shadow state or a 64-bit reflected shortcut. Signed-off-by: James Hilliard <[email protected]> --- Changes v9 -> v10: - Preserve the 64-bit UIA2 GFM reduction path used by SNOW3G F9. - Rework reflected GFM helpers around the full 128-bit architectural state. - Use direct RESINP XOR operations instead of target-local XOR shadow state. Changes v8 -> v9: - Split GFM selector operations into their own COP2 helper patch. - Expose per-operation helpers instead of a generic selector helper. - Add matching helper.h declarations with the helper implementation. --- target/mips/helper.h | 4 ++ target/mips/tcg/octeon_crypto.c | 125 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) diff --git a/target/mips/helper.h b/target/mips/helper.h index e802f50fd6..7767556f79 100644 --- a/target/mips/helper.h +++ b/target/mips/helper.h @@ -38,6 +38,10 @@ DEF_HELPER_2(octeon_cp2_mt_crc_write_dword, void, env, i64) DEF_HELPER_2(octeon_cp2_mt_crc_write_var, void, env, i64) DEF_HELPER_2(octeon_cp2_mt_crc_write_dword_reflect, void, env, i64) DEF_HELPER_2(octeon_cp2_mt_crc_write_var_reflect, void, env, i64) +DEF_HELPER_2(octeon_cp2_mt_gfm_xor0_reflect, void, env, i64) +DEF_HELPER_2(octeon_cp2_mt_gfm_xor0, void, env, i64) +DEF_HELPER_2(octeon_cp2_mt_gfm_xormul1_reflect, void, env, i64) +DEF_HELPER_2(octeon_cp2_mt_gfm_xormul1, void, env, i64) /* microMIPS functions */ DEF_HELPER_4(lwm, void, env, tl, tl, i32) diff --git a/target/mips/tcg/octeon_crypto.c b/target/mips/tcg/octeon_crypto.c index 811f36f46a..d59bd8fdac 100644 --- a/target/mips/tcg/octeon_crypto.c +++ b/target/mips/tcg/octeon_crypto.c @@ -11,6 +11,7 @@ #include "internal.h" #include "exec/helper-proto.h" #include "crypto/aes.h" +#include "crypto/clmul.h" #include "crypto/sm4.h" #include "qemu/bitops.h" #include "qemu/host-utils.h" @@ -75,11 +76,135 @@ static void octeon_crc_update_reflect(MIPSOcteonCryptoState *crypto, octeon_crc_set_state_reflect(crypto, crc); } +static void octeon_gfm_mul(const uint64_t x[2], const uint64_t y[2], + uint16_t poly, uint64_t out[2]) +{ + uint64_t zh = 0, zl = 0; + uint64_t vh = y[0], vl = y[1]; + uint64_t rh = (uint64_t)poly << 48; + int i; + + /* + * Keep the reflected-shift formulation used by Octeon software: the + * selector polynomial is pre-positioned at the top of the high word before + * each carry reduction. + */ + for (i = 0; i < 128; i++) { + bool bit; + bool lsb; + + if (i < 64) { + bit = (x[0] >> (63 - i)) & 1; + } else { + bit = (x[1] >> (127 - i)) & 1; + } + if (bit) { + zh ^= vh; + zl ^= vl; + } + + lsb = vl & 1; + vl = (vh << 63) | (vl >> 1); + vh >>= 1; + if (lsb) { + vh ^= rh; + } + } + + out[0] = zh; + out[1] = zl; +} + +static uint64_t octeon_gfm_reduce64(Int128 product, uint8_t poly) +{ + uint64_t lo = int128_getlo(product); + uint64_t hi = int128_gethi(product); + + while (hi) { + int bit = 63 - clz64(hi); + + hi ^= 1ULL << bit; + lo ^= (uint64_t)poly << bit; + if (bit > 56) { + hi ^= (uint64_t)poly >> (64 - bit); + } + } + + return lo; +} + +static void octeon_gfm_mul64_uia2(const uint64_t x[2], const uint64_t y[2], + uint8_t poly, uint64_t out[2]) +{ + /* + * UIA2 uses the GFM datapath as a reflected 64-bit multiply in the low + * half of the 128-bit register pair. + */ + uint64_t vx = revbit64(x[1]); + uint64_t vy = revbit64(y[0]); + Int128 product = clmul_64(vx, vy); + uint64_t res = octeon_gfm_reduce64(product, revbit32(poly) >> 24); + + out[0] = 0; + out[1] = revbit64(res); +} + +static void octeon_gfm_mul_reflect(MIPSOcteonCryptoState *crypto) +{ + uint64_t in[2] = { + revbit64(crypto->gfm_reflect_resinp[0]), + revbit64(crypto->gfm_reflect_resinp[1]), + }; + uint64_t mul[2] = { + revbit64(crypto->gfm_reflect_mul[0]), + revbit64(crypto->gfm_reflect_mul[1]), + }; + uint64_t out[2]; + + octeon_gfm_mul(in, mul, crypto->gfm_poly, out); + crypto->gfm_reflect_resinp[0] = revbit64(out[0]); + crypto->gfm_reflect_resinp[1] = revbit64(out[1]); +} + uint64_t helper_octeon_cp2_mf_crc_iv_reflect(CPUMIPSState *env) { return octeon_crc_reflect32_by_byte(env->octeon_crypto.crc_iv); } +void helper_octeon_cp2_mt_gfm_xor0_reflect(CPUMIPSState *env, uint64_t value) +{ + env->octeon_crypto.gfm_reflect_resinp[0] ^= value; +} + +void helper_octeon_cp2_mt_gfm_xor0(CPUMIPSState *env, uint64_t value) +{ + env->octeon_crypto.gfm_resinp[0] ^= value; +} + +void helper_octeon_cp2_mt_gfm_xormul1_reflect(CPUMIPSState *env, + uint64_t value) +{ + MIPSOcteonCryptoState *crypto = &env->octeon_crypto; + + crypto->gfm_reflect_resinp[1] ^= value; + octeon_gfm_mul_reflect(crypto); +} + +void helper_octeon_cp2_mt_gfm_xormul1(CPUMIPSState *env, uint64_t value) +{ + MIPSOcteonCryptoState *crypto = &env->octeon_crypto; + + crypto->gfm_resinp[1] ^= value; + if (crypto->gfm_poly <= 0xff && crypto->gfm_mul[1] == 0 && + crypto->gfm_resinp[0] == 0) { + octeon_gfm_mul64_uia2(crypto->gfm_resinp, crypto->gfm_mul, + crypto->gfm_poly, crypto->gfm_resinp); + } else { + octeon_gfm_mul(crypto->gfm_resinp, crypto->gfm_mul, crypto->gfm_poly, + crypto->gfm_resinp); + } +} + void helper_octeon_cp2_mt_crc_write_iv_reflect(CPUMIPSState *env, uint64_t value) { -- 2.54.0
