On 5/19/26 09:22, James Hilliard wrote:
--- a/target/mips/tcg/octeon_crypto.c
+++ b/target/mips/tcg/octeon_crypto.c
@@ -94,10 +94,108 @@ static void 
octeon_crc_update_reflect(MIPSOcteonCryptoState *crypto,
octeon_crc_set_state_reflect(crypto, crc);
  }
+static uint64_t octeon_gfm_reduce64(Int128 product, uint8_t poly)

Missing newline.

+static void octeon_gfm_mul64_uia2(const uint64_t x[2], const uint64_t y[2],
+                                  uint8_t poly, uint64_t out[2])
+{
+    uint64_t vx = revbit64(x[1]);
+    uint64_t vy = revbit64(y[0]);
+    Int128 product = clmul_64(vx, vy);
+    uint64_t res = octeon_gfm_reduce64(product, revbit32(poly) >> 24);
+
+    out[0] = 0;
+    out[1] = revbit64(res);
+}
+
+static void octeon_gfm_mul_reflect(MIPSOcteonCryptoState *crypto, uint64_t 
data)
+{
+    uint64_t in[2] = {
+        crypto->gfm_reflect_resinp[0] ^ crypto->gfm_reflect_xor0,
+        crypto->gfm_reflect_resinp[1] ^ data,
+    };
+
+    octeon_gfm_mul64_uia2(in, crypto->gfm_reflect_mul,
+                          crypto->gfm_poly, crypto->gfm_reflect_resinp);
+    crypto->gfm_reflect_xor0 = 0;
+}

This seems to be wrong. At least I don't see how it matches the pseudocode. Certainly helper_octeon_cp2_mt_gfm_xormul1_reflect should not just handle 64 bits like this.

+static void octeon_gfm_mul(const uint64_t x[2], const uint64_t y[2],
+                           uint16_t poly, uint64_t out[2])
+{
+    uint64_t zh = 0, zl = 0;
+    uint64_t vh = y[0], vl = y[1];
+    uint64_t rh = (uint64_t)poly << 48;
+    int i;
+
+    for (i = 0; i < 128; i++) {
+        bool bit;
+        bool lsb;
+
+        if (i < 64) {
+            bit = (x[0] >> (63 - i)) & 1;
+        } else {
+            bit = (x[1] >> (127 - i)) & 1;
+        }
+        if (bit) {
+            zh ^= vh;
+            zl ^= vl;
+        }
+
+        lsb = vl & 1;
+        vl = (vh << 63) | (vl >> 1);
+        vh >>= 1;
+        if (lsb) {
+            vh ^= rh;
+        }
+    }
+
+    out[0] = zh;
+    out[1] = zl;
+}

Some comments would be good here, especially since this doesn't match the 
pseudocode.

There are lots of doubleword_bit_reflect in the pseudocode, which you appear to be avoiding by reversing the direction of the shift. You appear to be swapping the read and writeback of hi/lo to account for the individual and separate doubleword_bit_reflect wrt the 128-bit quantity.

Your treatment of the polynomial appears to be wrong. The pseudocode places 8 bits in POLY[0] and 8 bits in POLY[1], whereas you're placing 16 polynomial bits in POLY[0].

I think you're overthinking the function parameters. You might as well directly access env->crypto.gfm_*.

+void helper_octeon_cp2_mt_gfm_xormul1(CPUMIPSState *env, uint64_t value)
+{
+    MIPSOcteonCryptoState *crypto = &env->octeon_crypto;
+    uint64_t in[2] = {
+        crypto->gfm_resinp[0] ^ crypto->gfm_xor0,
+        crypto->gfm_resinp[1] ^ value,
+    };
+
+    if (crypto->gfm_poly <= 0xff && crypto->gfm_mul[1] == 0 && in[0] == 0) {
+        octeon_gfm_mul64_uia2(in, crypto->gfm_mul,
+                              crypto->gfm_poly, crypto->gfm_resinp);

The uia2 path is definitely non-obvious.

I don't see how you can use clmul_64 and include the polynomial. If there were to be a clmul path, I would expect GFMPOLY == 0, so that V does not vary other than by shifting around the GFMMULT loop.

Either way, perhaps this is premature optimization.


r~

Reply via email to