On 5/19/26 09:22, James Hilliard wrote:
--- a/target/mips/tcg/octeon_crypto.c
+++ b/target/mips/tcg/octeon_crypto.c
@@ -94,10 +94,108 @@ static void
octeon_crc_update_reflect(MIPSOcteonCryptoState *crypto,
octeon_crc_set_state_reflect(crypto, crc);
}
+static uint64_t octeon_gfm_reduce64(Int128 product, uint8_t poly)
Missing newline.
+static void octeon_gfm_mul64_uia2(const uint64_t x[2], const uint64_t y[2],
+ uint8_t poly, uint64_t out[2])
+{
+ uint64_t vx = revbit64(x[1]);
+ uint64_t vy = revbit64(y[0]);
+ Int128 product = clmul_64(vx, vy);
+ uint64_t res = octeon_gfm_reduce64(product, revbit32(poly) >> 24);
+
+ out[0] = 0;
+ out[1] = revbit64(res);
+}
+
+static void octeon_gfm_mul_reflect(MIPSOcteonCryptoState *crypto, uint64_t
data)
+{
+ uint64_t in[2] = {
+ crypto->gfm_reflect_resinp[0] ^ crypto->gfm_reflect_xor0,
+ crypto->gfm_reflect_resinp[1] ^ data,
+ };
+
+ octeon_gfm_mul64_uia2(in, crypto->gfm_reflect_mul,
+ crypto->gfm_poly, crypto->gfm_reflect_resinp);
+ crypto->gfm_reflect_xor0 = 0;
+}
This seems to be wrong. At least I don't see how it matches the pseudocode. Certainly
helper_octeon_cp2_mt_gfm_xormul1_reflect should not just handle 64 bits like this.
+static void octeon_gfm_mul(const uint64_t x[2], const uint64_t y[2],
+ uint16_t poly, uint64_t out[2])
+{
+ uint64_t zh = 0, zl = 0;
+ uint64_t vh = y[0], vl = y[1];
+ uint64_t rh = (uint64_t)poly << 48;
+ int i;
+
+ for (i = 0; i < 128; i++) {
+ bool bit;
+ bool lsb;
+
+ if (i < 64) {
+ bit = (x[0] >> (63 - i)) & 1;
+ } else {
+ bit = (x[1] >> (127 - i)) & 1;
+ }
+ if (bit) {
+ zh ^= vh;
+ zl ^= vl;
+ }
+
+ lsb = vl & 1;
+ vl = (vh << 63) | (vl >> 1);
+ vh >>= 1;
+ if (lsb) {
+ vh ^= rh;
+ }
+ }
+
+ out[0] = zh;
+ out[1] = zl;
+}
Some comments would be good here, especially since this doesn't match the
pseudocode.
There are lots of doubleword_bit_reflect in the pseudocode, which you appear to be
avoiding by reversing the direction of the shift. You appear to be swapping the read and
writeback of hi/lo to account for the individual and separate doubleword_bit_reflect wrt
the 128-bit quantity.
Your treatment of the polynomial appears to be wrong. The pseudocode places 8 bits in
POLY[0] and 8 bits in POLY[1], whereas you're placing 16 polynomial bits in POLY[0].
I think you're overthinking the function parameters. You might as well directly access
env->crypto.gfm_*.
+void helper_octeon_cp2_mt_gfm_xormul1(CPUMIPSState *env, uint64_t value)
+{
+ MIPSOcteonCryptoState *crypto = &env->octeon_crypto;
+ uint64_t in[2] = {
+ crypto->gfm_resinp[0] ^ crypto->gfm_xor0,
+ crypto->gfm_resinp[1] ^ value,
+ };
+
+ if (crypto->gfm_poly <= 0xff && crypto->gfm_mul[1] == 0 && in[0] == 0) {
+ octeon_gfm_mul64_uia2(in, crypto->gfm_mul,
+ crypto->gfm_poly, crypto->gfm_resinp);
The uia2 path is definitely non-obvious.
I don't see how you can use clmul_64 and include the polynomial. If there were to be a
clmul path, I would expect GFMPOLY == 0, so that V does not vary other than by shifting
around the GFMMULT loop.
Either way, perhaps this is premature optimization.
r~