Add the helper implementation for Octeon's selector-driven DMFC2/DMTC2
crypto interface. The helpers model the base hash, AES, CRC, GFM, 3DES,
KASUMI, and SNOW3G engines, including shared-window behavior and AES
RESINP result-input staging semantics.

Keep the implementation in octeon_crypto.c so the generic MIPS helper layer
stays focused on architectural plumbing. A later patch decodes the selectors
explicitly and routes simple register transfers directly through TCG.

Signed-off-by: James Hilliard <[email protected]>

---
Changes v7 -> v8:
  - Split COP2 crypto helper implementation out of the combined COP2
    crypto core patch.
---
 target/mips/helper.h            |    2 +
 target/mips/tcg/meson.build     |    1 +
 target/mips/tcg/octeon_crypto.c | 1654 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 1657 insertions(+)

diff --git a/target/mips/helper.h b/target/mips/helper.h
index e2b83a1d19..b2ced10a4f 100644
--- a/target/mips/helper.h
+++ b/target/mips/helper.h
@@ -24,6 +24,8 @@ DEF_HELPER_FLAGS_1(dbitswap, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_3(crc32, tl, tl, tl, i32)
 DEF_HELPER_3(crc32c, tl, tl, tl, i32)
 DEF_HELPER_FLAGS_4(rotx, TCG_CALL_NO_RWG_SE, tl, tl, i32, i32, i32)
+DEF_HELPER_2(octeon_cop2_dmfc2, i64, env, i32)
+DEF_HELPER_3(octeon_cop2_dmtc2, void, env, i64, i32)
 
 /* microMIPS functions */
 DEF_HELPER_4(lwm, void, env, tl, tl, i32)
diff --git a/target/mips/tcg/meson.build b/target/mips/tcg/meson.build
index fff9cd6c7f..4ee359874a 100644
--- a/target/mips/tcg/meson.build
+++ b/target/mips/tcg/meson.build
@@ -18,6 +18,7 @@ mips_ss.add(files(
   'lmmi_helper.c',
   'msa_helper.c',
   'msa_translate.c',
+  'octeon_crypto.c',
   'op_helper.c',
   'rel6_translate.c',
   'translate.c',
diff --git a/target/mips/tcg/octeon_crypto.c b/target/mips/tcg/octeon_crypto.c
new file mode 100644
index 0000000000..8b3260c4d6
--- /dev/null
+++ b/target/mips/tcg/octeon_crypto.c
@@ -0,0 +1,1654 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * MIPS Octeon crypto emulation helpers.
+ *
+ * Copyright (c) 2026 James Hilliard
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internal.h"
+#include "exec/helper-proto.h"
+#include "crypto/aes.h"
+#include "crypto/clmul.h"
+#include "qemu/bitops.h"
+#include "qemu/host-utils.h"
+
+static inline void octeon_set_shared_mode(MIPSOcteonCryptoState *crypto,
+                                          MIPSOcteonSharedMode mode)
+{
+    crypto->shared_mode = mode;
+}
+
+static inline uint32_t octeon_crc_reflect32_by_byte(uint32_t v)
+{
+    return bswap32(revbit32(v));
+}
+
+static uint32_t octeon_crc_state_reflect(const MIPSOcteonCryptoState *crypto)
+{
+    return octeon_crc_reflect32_by_byte(crypto->crc_iv);
+}
+
+static void octeon_crc_set_state_reflect(MIPSOcteonCryptoState *crypto,
+                                         uint32_t state)
+{
+    crypto->crc_iv = octeon_crc_reflect32_by_byte(state);
+}
+
+static void octeon_crc_update_normal(MIPSOcteonCryptoState *crypto,
+                                     uint64_t value, unsigned int bytes)
+{
+    uint32_t crc = crypto->crc_iv;
+    uint32_t poly = crypto->crc_poly;
+
+    for (unsigned int i = 0; i < bytes; i++) {
+        uint8_t byte = value >> ((bytes - 1 - i) * 8);
+
+        crc ^= (uint32_t)byte << 24;
+        for (int bit = 0; bit < 8; bit++) {
+            if (crc & 0x80000000U) {
+                crc = (crc << 1) ^ poly;
+            } else {
+                crc <<= 1;
+            }
+        }
+    }
+
+    crypto->crc_iv = crc;
+}
+
+static void octeon_crc_update_reflect(MIPSOcteonCryptoState *crypto,
+                                      uint64_t value, unsigned int bytes)
+{
+    uint32_t crc = octeon_crc_state_reflect(crypto);
+    uint32_t poly = bswap32(crypto->crc_poly);
+
+    for (unsigned int i = 0; i < bytes; i++) {
+        uint8_t byte = value >> ((bytes - 1 - i) * 8);
+
+        crc ^= byte;
+        for (int bit = 0; bit < 8; bit++) {
+            if (crc & 1U) {
+                crc = (crc >> 1) ^ poly;
+            } else {
+                crc >>= 1;
+            }
+        }
+    }
+
+    octeon_crc_set_state_reflect(crypto, crc);
+}
+
+static uint64_t octeon_gfm_reduce64(Int128 product, uint8_t poly)
+{
+    uint64_t lo = int128_getlo(product);
+    uint64_t hi = int128_gethi(product);
+
+    while (hi) {
+        int bit = 63 - clz64(hi);
+        uint64_t shifted_poly = (uint64_t)poly << bit;
+
+        hi ^= 1ULL << bit;
+        lo ^= shifted_poly;
+        if (bit > 56) {
+            hi ^= (uint64_t)poly >> (64 - bit);
+        }
+    }
+
+    return lo;
+}
+
+static void octeon_gfm_mul64_uia2(const uint64_t x[2], const uint64_t y[2],
+                                  uint8_t poly, uint64_t out[2])
+{
+    uint64_t vx = revbit64(x[1]);
+    uint64_t vy = revbit64(y[0]);
+    Int128 product = clmul_64(vx, vy);
+    uint64_t res = octeon_gfm_reduce64(product, revbit32(poly) >> 24);
+
+    out[0] = 0;
+    out[1] = revbit64(res);
+}
+
+static void octeon_gfm_mul_reflect(MIPSOcteonCryptoState *crypto, uint64_t 
data)
+{
+    uint64_t in[2] = {
+        crypto->gfm_reflect_resinp[0] ^ crypto->gfm_reflect_xor0,
+        crypto->gfm_reflect_resinp[1] ^ data,
+    };
+
+    octeon_gfm_mul64_uia2(in, crypto->gfm_reflect_mul,
+                          crypto->gfm_poly, crypto->gfm_reflect_resinp);
+    crypto->gfm_reflect_xor0 = 0;
+}
+
+static inline void octeon_hsh_load_reg_words_be(uint64_t reg,
+                                                 uint32_t *hi, uint32_t *lo)
+{
+    uint8_t buf[8];
+
+    stq_be_p(buf, reg);
+    *hi = ldl_be_p(buf);
+    *lo = ldl_be_p(buf + 4);
+}
+
+static inline void octeon_hsh_load_reg_words_le(uint64_t reg,
+                                                 uint32_t *lo0, uint32_t *lo1)
+{
+    uint8_t buf[8];
+
+    stq_be_p(buf, reg);
+    *lo0 = ldl_le_p(buf);
+    *lo1 = ldl_le_p(buf + 4);
+}
+
+static inline uint64_t octeon_hsh_store_reg_words_be(uint32_t hi, uint32_t lo)
+{
+    uint8_t buf[8];
+
+    stl_be_p(buf, hi);
+    stl_be_p(buf + 4, lo);
+    return ldq_be_p(buf);
+}
+
+static inline uint64_t octeon_hsh_store_reg_words_le(uint32_t lo0,
+                                                      uint32_t lo1)
+{
+    uint8_t buf[8];
+
+    stl_le_p(buf, lo0);
+    stl_le_p(buf + 4, lo1);
+    return ldq_be_p(buf);
+}
+
+static void octeon_md5_transform(MIPSOcteonCryptoState *crypto)
+{
+    static const uint32_t k[64] = {
+        0xd76aa478U, 0xe8c7b756U, 0x242070dbU, 0xc1bdceeeU,
+        0xf57c0fafU, 0x4787c62aU, 0xa8304613U, 0xfd469501U,
+        0x698098d8U, 0x8b44f7afU, 0xffff5bb1U, 0x895cd7beU,
+        0x6b901122U, 0xfd987193U, 0xa679438eU, 0x49b40821U,
+        0xf61e2562U, 0xc040b340U, 0x265e5a51U, 0xe9b6c7aaU,
+        0xd62f105dU, 0x02441453U, 0xd8a1e681U, 0xe7d3fbc8U,
+        0x21e1cde6U, 0xc33707d6U, 0xf4d50d87U, 0x455a14edU,
+        0xa9e3e905U, 0xfcefa3f8U, 0x676f02d9U, 0x8d2a4c8aU,
+        0xfffa3942U, 0x8771f681U, 0x6d9d6122U, 0xfde5380cU,
+        0xa4beea44U, 0x4bdecfa9U, 0xf6bb4b60U, 0xbebfbc70U,
+        0x289b7ec6U, 0xeaa127faU, 0xd4ef3085U, 0x04881d05U,
+        0xd9d4d039U, 0xe6db99e5U, 0x1fa27cf8U, 0xc4ac5665U,
+        0xf4292244U, 0x432aff97U, 0xab9423a7U, 0xfc93a039U,
+        0x655b59c3U, 0x8f0ccc92U, 0xffeff47dU, 0x85845dd1U,
+        0x6fa87e4fU, 0xfe2ce6e0U, 0xa3014314U, 0x4e0811a1U,
+        0xf7537e82U, 0xbd3af235U, 0x2ad7d2bbU, 0xeb86d391U,
+    };
+    static const uint8_t s[64] = {
+        7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
+        5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
+        4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
+        6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21,
+    };
+    uint8_t block_bytes[64];
+    uint32_t m[16];
+    uint32_t a, b, c, d;
+    uint32_t aa, bb, cc, dd;
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        stq_be_p(block_bytes + (i * 8), crypto->hsh_dat[i]);
+        m[i * 2] = ldl_le_p(block_bytes + (i * 8));
+        m[i * 2 + 1] = ldl_le_p(block_bytes + (i * 8) + 4);
+    }
+
+    octeon_hsh_load_reg_words_le(crypto->hsh_iv[0], &a, &b);
+    octeon_hsh_load_reg_words_le(crypto->hsh_iv[1], &c, &d);
+    aa = a;
+    bb = b;
+    cc = c;
+    dd = d;
+
+    for (i = 0; i < 64; i++) {
+        uint32_t f, g, tmp;
+
+        if (i < 16) {
+            f = (b & c) | ((~b) & d);
+            g = i;
+        } else if (i < 32) {
+            f = (d & b) | ((~d) & c);
+            g = (5 * i + 1) & 0xf;
+        } else if (i < 48) {
+            f = b ^ c ^ d;
+            g = (3 * i + 5) & 0xf;
+        } else {
+            f = c ^ (b | (~d));
+            g = (7 * i) & 0xf;
+        }
+
+        tmp = d;
+        d = c;
+        c = b;
+        b = b + rol32(a + f + k[i] + m[g], s[i]);
+        a = tmp;
+    }
+
+    a += aa;
+    b += bb;
+    c += cc;
+    d += dd;
+    crypto->hsh_iv[0] = octeon_hsh_store_reg_words_le(a, b);
+    crypto->hsh_iv[1] = octeon_hsh_store_reg_words_le(c, d);
+}
+
+static void octeon_sha1_transform(MIPSOcteonCryptoState *crypto)
+{
+    uint32_t w[80];
+    uint32_t a, b, c, d, e;
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        octeon_hsh_load_reg_words_be(crypto->hsh_dat[i],
+                                      &w[i * 2], &w[i * 2 + 1]);
+    }
+    for (i = 16; i < 80; i++) {
+        w[i] = rol32(w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16], 1);
+    }
+
+    octeon_hsh_load_reg_words_be(crypto->hsh_iv[0], &a, &b);
+    octeon_hsh_load_reg_words_be(crypto->hsh_iv[1], &c, &d);
+    e = crypto->hsh_iv[2] >> 32;
+
+    for (i = 0; i < 80; i++) {
+        uint32_t f, k, temp;
+
+        if (i < 20) {
+            f = (b & c) | ((~b) & d);
+            k = 0x5a827999;
+        } else if (i < 40) {
+            f = b ^ c ^ d;
+            k = 0x6ed9eba1;
+        } else if (i < 60) {
+            f = (b & c) | (b & d) | (c & d);
+            k = 0x8f1bbcdc;
+        } else {
+            f = b ^ c ^ d;
+            k = 0xca62c1d6;
+        }
+
+        temp = rol32(a, 5) + f + e + k + w[i];
+        e = d;
+        d = c;
+        c = rol32(b, 30);
+        b = a;
+        a = temp;
+    }
+
+    octeon_hsh_load_reg_words_be(crypto->hsh_iv[0], &w[0], &w[1]);
+    octeon_hsh_load_reg_words_be(crypto->hsh_iv[1], &w[2], &w[3]);
+    w[4] = crypto->hsh_iv[2] >> 32;
+    w[0] += a;
+    w[1] += b;
+    w[2] += c;
+    w[3] += d;
+    w[4] += e;
+    crypto->hsh_iv[0] = octeon_hsh_store_reg_words_be(w[0], w[1]);
+    crypto->hsh_iv[1] = octeon_hsh_store_reg_words_be(w[2], w[3]);
+    crypto->hsh_iv[2] = (uint64_t)w[4] << 32;
+}
+
+static void octeon_sha256_transform(MIPSOcteonCryptoState *crypto)
+{
+    static const uint32_t k[64] = {
+        0x428a2f98U, 0x71374491U, 0xb5c0fbcfU, 0xe9b5dba5U,
+        0x3956c25bU, 0x59f111f1U, 0x923f82a4U, 0xab1c5ed5U,
+        0xd807aa98U, 0x12835b01U, 0x243185beU, 0x550c7dc3U,
+        0x72be5d74U, 0x80deb1feU, 0x9bdc06a7U, 0xc19bf174U,
+        0xe49b69c1U, 0xefbe4786U, 0x0fc19dc6U, 0x240ca1ccU,
+        0x2de92c6fU, 0x4a7484aaU, 0x5cb0a9dcU, 0x76f988daU,
+        0x983e5152U, 0xa831c66dU, 0xb00327c8U, 0xbf597fc7U,
+        0xc6e00bf3U, 0xd5a79147U, 0x06ca6351U, 0x14292967U,
+        0x27b70a85U, 0x2e1b2138U, 0x4d2c6dfcU, 0x53380d13U,
+        0x650a7354U, 0x766a0abbU, 0x81c2c92eU, 0x92722c85U,
+        0xa2bfe8a1U, 0xa81a664bU, 0xc24b8b70U, 0xc76c51a3U,
+        0xd192e819U, 0xd6990624U, 0xf40e3585U, 0x106aa070U,
+        0x19a4c116U, 0x1e376c08U, 0x2748774cU, 0x34b0bcb5U,
+        0x391c0cb3U, 0x4ed8aa4aU, 0x5b9cca4fU, 0x682e6ff3U,
+        0x748f82eeU, 0x78a5636fU, 0x84c87814U, 0x8cc70208U,
+        0x90befffaU, 0xa4506cebU, 0xbef9a3f7U, 0xc67178f2U,
+    };
+    uint32_t w[64];
+    uint32_t a, b, c, d, e, f, g, h;
+    uint32_t orig[8];
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        octeon_hsh_load_reg_words_be(crypto->hsh_dat[i],
+                                      &w[i * 2], &w[i * 2 + 1]);
+    }
+    for (i = 16; i < 64; i++) {
+        uint32_t s0 = ror32(w[i - 15], 7) ^
+                      ror32(w[i - 15], 18) ^
+                      (w[i - 15] >> 3);
+        uint32_t s1 = ror32(w[i - 2], 17) ^
+                      ror32(w[i - 2], 19) ^
+                      (w[i - 2] >> 10);
+        w[i] = w[i - 16] + s0 + w[i - 7] + s1;
+    }
+
+    for (i = 0; i < 4; i++) {
+        octeon_hsh_load_reg_words_be(crypto->hsh_iv[i],
+                                      &orig[i * 2], &orig[i * 2 + 1]);
+    }
+    a = orig[0];
+    b = orig[1];
+    c = orig[2];
+    d = orig[3];
+    e = orig[4];
+    f = orig[5];
+    g = orig[6];
+    h = orig[7];
+
+    for (i = 0; i < 64; i++) {
+        uint32_t s1 = ror32(e, 6) ^
+                      ror32(e, 11) ^
+                      ror32(e, 25);
+        uint32_t ch = (e & f) ^ ((~e) & g);
+        uint32_t temp1 = h + s1 + ch + k[i] + w[i];
+        uint32_t s0 = ror32(a, 2) ^
+                      ror32(a, 13) ^
+                      ror32(a, 22);
+        uint32_t maj = (a & b) ^ (a & c) ^ (b & c);
+        uint32_t temp2 = s0 + maj;
+
+        h = g;
+        g = f;
+        f = e;
+        e = d + temp1;
+        d = c;
+        c = b;
+        b = a;
+        a = temp1 + temp2;
+    }
+
+    orig[0] += a;
+    orig[1] += b;
+    orig[2] += c;
+    orig[3] += d;
+    orig[4] += e;
+    orig[5] += f;
+    orig[6] += g;
+    orig[7] += h;
+    for (i = 0; i < 4; i++) {
+        crypto->hsh_iv[i] =
+            octeon_hsh_store_reg_words_be(orig[i * 2], orig[i * 2 + 1]);
+    }
+}
+
+static void octeon_sha512_transform(MIPSOcteonCryptoState *crypto)
+{
+    static const uint64_t k[80] = {
+        0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
+        0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
+        0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+        0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
+        0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
+        0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+        0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
+        0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
+        0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+        0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
+        0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
+        0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+        0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
+        0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
+        0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+        0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
+        0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
+        0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+        0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
+        0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
+        0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+        0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
+        0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
+        0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+        0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
+        0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
+        0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+        0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
+        0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
+        0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+        0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
+        0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
+        0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+        0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
+        0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
+        0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+        0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
+        0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
+        0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+        0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,
+    };
+    uint64_t w[80];
+    uint64_t a, b, c, d, e, f, g, h;
+    int i;
+
+    for (i = 0; i < 16; i++) {
+        w[i] = crypto->hsh_datw[i];
+    }
+    for (i = 16; i < 80; i++) {
+        uint64_t s0 = ror64(w[i - 15], 1) ^
+                      ror64(w[i - 15], 8) ^
+                      (w[i - 15] >> 7);
+        uint64_t s1 = ror64(w[i - 2], 19) ^
+                      ror64(w[i - 2], 61) ^
+                      (w[i - 2] >> 6);
+        w[i] = w[i - 16] + s0 + w[i - 7] + s1;
+    }
+
+    a = crypto->hsh_ivw[0];
+    b = crypto->hsh_ivw[1];
+    c = crypto->hsh_ivw[2];
+    d = crypto->hsh_ivw[3];
+    e = crypto->hsh_ivw[4];
+    f = crypto->hsh_ivw[5];
+    g = crypto->hsh_ivw[6];
+    h = crypto->hsh_ivw[7];
+
+    for (i = 0; i < 80; i++) {
+        uint64_t s0 = ror64(a, 28) ^
+                      ror64(a, 34) ^
+                      ror64(a, 39);
+        uint64_t s1 = ror64(e, 14) ^
+                      ror64(e, 18) ^
+                      ror64(e, 41);
+        uint64_t ch = (e & f) ^ ((~e) & g);
+        uint64_t maj = (a & b) ^ (a & c) ^ (b & c);
+        uint64_t temp1 = h + s1 + ch + k[i] + w[i];
+        uint64_t temp2 = s0 + maj;
+
+        h = g;
+        g = f;
+        f = e;
+        e = d + temp1;
+        d = c;
+        c = b;
+        b = a;
+        a = temp1 + temp2;
+    }
+
+    crypto->hsh_ivw[0] += a;
+    crypto->hsh_ivw[1] += b;
+    crypto->hsh_ivw[2] += c;
+    crypto->hsh_ivw[3] += d;
+    crypto->hsh_ivw[4] += e;
+    crypto->hsh_ivw[5] += f;
+    crypto->hsh_ivw[6] += g;
+    crypto->hsh_ivw[7] += h;
+}
+
+static void octeon_store_shared_hsh_window(MIPSOcteonCryptoState *crypto,
+                                         uint32_t sel, uint64_t value)
+{
+    switch (sel) {
+    case OCTEON_COP2_SEL_HSH_DATW0 ... OCTEON_COP2_SEL_HSH_DATW14:
+        crypto->hsh_datw[sel - OCTEON_COP2_SEL_HSH_DATW0] = value;
+        break;
+    case OCTEON_COP2_SEL_HSH_IVW0 ... OCTEON_COP2_SEL_HSH_IVW7:
+        crypto->hsh_ivw[sel - OCTEON_COP2_SEL_HSH_IVW0] = value;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static const uint8_t octeon_snow3g_sr[256] = {
+    0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
+    0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+    0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
+    0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+    0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
+    0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+    0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
+    0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+    0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
+    0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+    0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
+    0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+    0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
+    0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+    0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+    0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+    0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
+    0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+    0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
+    0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+    0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
+    0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+    0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
+    0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+    0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
+    0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+    0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
+    0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+    0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
+    0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+    0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
+    0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
+};
+
+static const uint8_t octeon_snow3g_sq[256] = {
+    0x25, 0x24, 0x73, 0x67, 0xd7, 0xae, 0x5c, 0x30,
+    0xa4, 0xee, 0x6e, 0xcb, 0x7d, 0xb5, 0x82, 0xdb,
+    0xe4, 0x8e, 0x48, 0x49, 0x4f, 0x5d, 0x6a, 0x78,
+    0x70, 0x88, 0xe8, 0x5f, 0x5e, 0x84, 0x65, 0xe2,
+    0xd8, 0xe9, 0xcc, 0xed, 0x40, 0x2f, 0x11, 0x28,
+    0x57, 0xd2, 0xac, 0xe3, 0x4a, 0x15, 0x1b, 0xb9,
+    0xb2, 0x80, 0x85, 0xa6, 0x2e, 0x02, 0x47, 0x29,
+    0x07, 0x4b, 0x0e, 0xc1, 0x51, 0xaa, 0x89, 0xd4,
+    0xca, 0x01, 0x46, 0xb3, 0xef, 0xdd, 0x44, 0x7b,
+    0xc2, 0x7f, 0xbe, 0xc3, 0x9f, 0x20, 0x4c, 0x64,
+    0x83, 0xa2, 0x68, 0x42, 0x13, 0xb4, 0x41, 0xcd,
+    0xba, 0xc6, 0xbb, 0x6d, 0x4d, 0x71, 0x21, 0xf4,
+    0x8d, 0xb0, 0xe5, 0x93, 0xfe, 0x8f, 0xe6, 0xcf,
+    0x43, 0x45, 0x31, 0x22, 0x37, 0x36, 0x96, 0xfa,
+    0xbc, 0x0f, 0x08, 0x52, 0x1d, 0x55, 0x1a, 0xc5,
+    0x4e, 0x23, 0x69, 0x7a, 0x92, 0xff, 0x5b, 0x5a,
+    0xeb, 0x9a, 0x1c, 0xa9, 0xd1, 0x7e, 0x0d, 0xfc,
+    0x50, 0x8a, 0xb6, 0x62, 0xf5, 0x0a, 0xf8, 0xdc,
+    0x03, 0x3c, 0x0c, 0x39, 0xf1, 0xb8, 0xf3, 0x3d,
+    0xf2, 0xd5, 0x97, 0x66, 0x81, 0x32, 0xa0, 0x00,
+    0x06, 0xce, 0xf6, 0xea, 0xb7, 0x17, 0xf7, 0x8c,
+    0x79, 0xd6, 0xa7, 0xbf, 0x8b, 0x3f, 0x1f, 0x53,
+    0x63, 0x75, 0x35, 0x2c, 0x60, 0xfd, 0x27, 0xd3,
+    0x94, 0xa5, 0x7c, 0xa1, 0x05, 0x58, 0x2d, 0xbd,
+    0xd9, 0xc7, 0xaf, 0x6b, 0x54, 0x0b, 0xe0, 0x38,
+    0x04, 0xc8, 0x9d, 0xe7, 0x14, 0xb1, 0x87, 0x9c,
+    0xdf, 0x6f, 0xf9, 0xda, 0x2a, 0xc4, 0x59, 0x16,
+    0x74, 0x91, 0xab, 0x26, 0x61, 0x76, 0x34, 0x2b,
+    0xad, 0x99, 0xfb, 0x72, 0xec, 0x33, 0x12, 0xde,
+    0x98, 0x3b, 0xc0, 0x9b, 0x3e, 0x18, 0x10, 0x3a,
+    0x56, 0xe1, 0x77, 0xc9, 0x1e, 0x9e, 0x95, 0xa3,
+    0x90, 0x19, 0xa8, 0x6c, 0x09, 0xd0, 0xf0, 0x86,
+};
+
+static inline uint8_t octeon_snow3g_mulx(uint8_t v, uint8_t c)
+{
+    return (v & 0x80) ? ((v << 1) ^ c) : (v << 1);
+}
+
+static uint8_t octeon_snow3g_mulxpow(uint8_t v, unsigned int n, uint8_t c)
+{
+    while (n-- > 0) {
+        v = octeon_snow3g_mulx(v, c);
+    }
+    return v;
+}
+
+static inline uint32_t octeon_snow3g_pack32(uint8_t b0, uint8_t b1,
+                                            uint8_t b2, uint8_t b3)
+{
+    return ((uint32_t)b0 << 24) | ((uint32_t)b1 << 16) |
+           ((uint32_t)b2 << 8) | b3;
+}
+
+static uint32_t octeon_snow3g_mulalpha(uint8_t c)
+{
+    return octeon_snow3g_pack32(octeon_snow3g_mulxpow(c, 23, 0xa9),
+                                octeon_snow3g_mulxpow(c, 245, 0xa9),
+                                octeon_snow3g_mulxpow(c, 48, 0xa9),
+                                octeon_snow3g_mulxpow(c, 239, 0xa9));
+}
+
+static uint32_t octeon_snow3g_divalpha(uint8_t c)
+{
+    return octeon_snow3g_pack32(octeon_snow3g_mulxpow(c, 16, 0xa9),
+                                octeon_snow3g_mulxpow(c, 39, 0xa9),
+                                octeon_snow3g_mulxpow(c, 6, 0xa9),
+                                octeon_snow3g_mulxpow(c, 64, 0xa9));
+}
+
+static uint32_t octeon_snow3g_s1(uint32_t w)
+{
+    uint8_t x0 = octeon_snow3g_sr[w >> 24];
+    uint8_t x1 = octeon_snow3g_sr[(uint8_t)(w >> 16)];
+    uint8_t x2 = octeon_snow3g_sr[(uint8_t)(w >> 8)];
+    uint8_t x3 = octeon_snow3g_sr[(uint8_t)w];
+    uint8_t r0 = octeon_snow3g_mulx(x0, 0x1b) ^ x1 ^ x2 ^
+                 octeon_snow3g_mulx(x3, 0x1b) ^ x3;
+    uint8_t r1 = octeon_snow3g_mulx(x0, 0x1b) ^ x0 ^
+                 octeon_snow3g_mulx(x1, 0x1b) ^ x2 ^ x3;
+    uint8_t r2 = x0 ^ octeon_snow3g_mulx(x1, 0x1b) ^ x1 ^
+                 octeon_snow3g_mulx(x2, 0x1b) ^ x3;
+    uint8_t r3 = x0 ^ x1 ^ octeon_snow3g_mulx(x2, 0x1b) ^ x2 ^
+                 octeon_snow3g_mulx(x3, 0x1b);
+
+    return octeon_snow3g_pack32(r0, r1, r2, r3);
+}
+
+static uint32_t octeon_snow3g_s2(uint32_t w)
+{
+    uint8_t x0 = octeon_snow3g_sq[w >> 24];
+    uint8_t x1 = octeon_snow3g_sq[(uint8_t)(w >> 16)];
+    uint8_t x2 = octeon_snow3g_sq[(uint8_t)(w >> 8)];
+    uint8_t x3 = octeon_snow3g_sq[(uint8_t)w];
+    uint8_t r0 = octeon_snow3g_mulx(x0, 0x69) ^ x1 ^ x2 ^
+                 octeon_snow3g_mulx(x3, 0x69) ^ x3;
+    uint8_t r1 = octeon_snow3g_mulx(x0, 0x69) ^ x0 ^
+                 octeon_snow3g_mulx(x1, 0x69) ^ x2 ^ x3;
+    uint8_t r2 = x0 ^ octeon_snow3g_mulx(x1, 0x69) ^ x1 ^
+                 octeon_snow3g_mulx(x2, 0x69) ^ x3;
+    uint8_t r3 = x0 ^ x1 ^ octeon_snow3g_mulx(x2, 0x69) ^ x2 ^
+                 octeon_snow3g_mulx(x3, 0x69);
+
+    return octeon_snow3g_pack32(r0, r1, r2, r3);
+}
+
+static uint32_t octeon_snow3g_clock_fsm(MIPSOcteonCryptoState *crypto)
+{
+    uint32_t f = (uint32_t)(crypto->snow3g_lfsr[15] + crypto->snow3g_fsm[0]) ^
+                 crypto->snow3g_fsm[1];
+    uint32_t r = (uint32_t)(crypto->snow3g_fsm[1] +
+                            (crypto->snow3g_fsm[2] ^ crypto->snow3g_lfsr[5]));
+
+    crypto->snow3g_fsm[2] = octeon_snow3g_s2(crypto->snow3g_fsm[1]);
+    crypto->snow3g_fsm[1] = octeon_snow3g_s1(crypto->snow3g_fsm[0]);
+    crypto->snow3g_fsm[0] = r;
+    return f;
+}
+
+static void octeon_snow3g_clock_lfsr(MIPSOcteonCryptoState *crypto,
+                                     bool init_mode, uint32_t f)
+{
+    uint32_t s0 = crypto->snow3g_lfsr[0];
+    uint32_t s11 = crypto->snow3g_lfsr[11];
+    uint32_t v = (s0 << 8) ^ octeon_snow3g_mulalpha(s0 >> 24) ^
+                 crypto->snow3g_lfsr[2] ^ (s11 >> 8) ^
+                 octeon_snow3g_divalpha((uint8_t)s11);
+    int i;
+
+    if (init_mode) {
+        v ^= f;
+    }
+
+    for (i = 0; i < 15; i++) {
+        crypto->snow3g_lfsr[i] = crypto->snow3g_lfsr[i + 1];
+    }
+    crypto->snow3g_lfsr[15] = v;
+}
+
+static uint32_t octeon_snow3g_generate_word(MIPSOcteonCryptoState *crypto)
+{
+    uint32_t f = octeon_snow3g_clock_fsm(crypto);
+    uint32_t z = f ^ crypto->snow3g_lfsr[0];
+
+    octeon_snow3g_clock_lfsr(crypto, false, 0);
+    return z;
+}
+
+static void octeon_snow3g_queue_result(MIPSOcteonCryptoState *crypto)
+{
+    uint32_t z0 = octeon_snow3g_generate_word(crypto);
+    uint32_t z1 = octeon_snow3g_generate_word(crypto);
+
+    crypto->snow3g_result = ((uint64_t)z0 << 32) | z1;
+}
+
+static void octeon_snow3g_start(MIPSOcteonCryptoState *crypto, uint64_t data)
+{
+    int i;
+
+    octeon_set_shared_mode(crypto, OCTEON_SHARED_MODE_SNOW3G);
+    for (i = 0; i < 7; i++) {
+        uint64_t pair = crypto->hsh_datw[i];
+
+        crypto->snow3g_lfsr[i * 2] = pair >> 32;
+        crypto->snow3g_lfsr[i * 2 + 1] = pair;
+    }
+    crypto->snow3g_lfsr[14] = data >> 32;
+    crypto->snow3g_lfsr[15] = data;
+    memset(crypto->snow3g_fsm, 0, sizeof(crypto->snow3g_fsm));
+
+    for (i = 0; i < 32; i++) {
+        uint32_t f = octeon_snow3g_clock_fsm(crypto);
+
+        octeon_snow3g_clock_lfsr(crypto, true, f);
+    }
+
+    (void)octeon_snow3g_clock_fsm(crypto);
+    octeon_snow3g_clock_lfsr(crypto, false, 0);
+    octeon_snow3g_queue_result(crypto);
+}
+
+static void octeon_snow3g_more(MIPSOcteonCryptoState *crypto)
+{
+    octeon_set_shared_mode(crypto, OCTEON_SHARED_MODE_SNOW3G);
+    octeon_snow3g_queue_result(crypto);
+}
+
+static int octeon_aes_key_bits(const MIPSOcteonCryptoState *crypto)
+{
+    enum {
+        OCTEON_AES_KEYLEN_128 = 1,
+        OCTEON_AES_KEYLEN_192 = 2,
+        OCTEON_AES_KEYLEN_256 = 3,
+    };
+
+    switch (crypto->aes_keylen) {
+    case OCTEON_AES_KEYLEN_128:
+        return 128;
+    case OCTEON_AES_KEYLEN_192:
+        return 192;
+    case OCTEON_AES_KEYLEN_256:
+        return 256;
+    default:
+        return 0;
+    }
+}
+
+static const uint8_t octeon_des_ip[64] = {
+    58, 50, 42, 34, 26, 18, 10,  2,
+    60, 52, 44, 36, 28, 20, 12,  4,
+    62, 54, 46, 38, 30, 22, 14,  6,
+    64, 56, 48, 40, 32, 24, 16,  8,
+    57, 49, 41, 33, 25, 17,  9,  1,
+    59, 51, 43, 35, 27, 19, 11,  3,
+    61, 53, 45, 37, 29, 21, 13,  5,
+    63, 55, 47, 39, 31, 23, 15,  7,
+};
+
+static const uint8_t octeon_des_fp[64] = {
+    40,  8, 48, 16, 56, 24, 64, 32,
+    39,  7, 47, 15, 55, 23, 63, 31,
+    38,  6, 46, 14, 54, 22, 62, 30,
+    37,  5, 45, 13, 53, 21, 61, 29,
+    36,  4, 44, 12, 52, 20, 60, 28,
+    35,  3, 43, 11, 51, 19, 59, 27,
+    34,  2, 42, 10, 50, 18, 58, 26,
+    33,  1, 41,  9, 49, 17, 57, 25,
+};
+
+static const uint8_t octeon_des_e[48] = {
+    32,  1,  2,  3,  4,  5,
+     4,  5,  6,  7,  8,  9,
+     8,  9, 10, 11, 12, 13,
+    12, 13, 14, 15, 16, 17,
+    16, 17, 18, 19, 20, 21,
+    20, 21, 22, 23, 24, 25,
+    24, 25, 26, 27, 28, 29,
+    28, 29, 30, 31, 32,  1,
+};
+
+static const uint8_t octeon_des_p[32] = {
+    16,  7, 20, 21, 29, 12, 28, 17,
+     1, 15, 23, 26,  5, 18, 31, 10,
+     2,  8, 24, 14, 32, 27,  3,  9,
+    19, 13, 30,  6, 22, 11,  4, 25,
+};
+
+static const uint8_t octeon_des_pc1[56] = {
+    57, 49, 41, 33, 25, 17,  9,
+     1, 58, 50, 42, 34, 26, 18,
+    10,  2, 59, 51, 43, 35, 27,
+    19, 11,  3, 60, 52, 44, 36,
+    63, 55, 47, 39, 31, 23, 15,
+     7, 62, 54, 46, 38, 30, 22,
+    14,  6, 61, 53, 45, 37, 29,
+    21, 13,  5, 28, 20, 12,  4,
+};
+
+static const uint8_t octeon_des_pc2[48] = {
+    14, 17, 11, 24,  1,  5,
+     3, 28, 15,  6, 21, 10,
+    23, 19, 12,  4, 26,  8,
+    16,  7, 27, 20, 13,  2,
+    41, 52, 31, 37, 47, 55,
+    30, 40, 51, 45, 33, 48,
+    44, 49, 39, 56, 34, 53,
+    46, 42, 50, 36, 29, 32,
+};
+
+static const uint8_t octeon_des_rotations[16] = {
+    1, 1, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 1,
+};
+
+static const uint8_t octeon_des_sboxes[8][64] = {
+    {
+        14, 4, 13, 1,  2, 15, 11, 8,  3, 10, 6, 12, 5, 9, 0, 7,
+         0, 15, 7, 4, 14,  2, 13, 1, 10,  6, 12, 11, 9, 5, 3, 8,
+         4, 1, 14, 8, 13,  6,  2, 11, 15, 12, 9,  7, 3, 10, 5, 0,
+        15, 12, 8, 2,  4,  9,  1, 7,  5, 11, 3, 14, 10, 0, 6, 13,
+    },
+    {
+        15, 1,  8, 14, 6, 11, 3, 4,  9, 7, 2, 13, 12, 0, 5, 10,
+         3, 13, 4, 7, 15, 2,  8, 14, 12, 0, 1, 10,  6, 9, 11, 5,
+         0, 14, 7, 11, 10, 4, 13, 1,  5, 8, 12, 6,  9, 3,  2, 15,
+        13, 8, 10, 1,  3, 15, 4, 2, 11, 6, 7, 12,  0, 5, 14, 9,
+    },
+    {
+        10, 0,  9, 14, 6, 3, 15, 5,  1, 13, 12, 7, 11, 4, 2, 8,
+        13, 7,  0, 9,  3, 4, 6,  10, 2, 8,  5, 14, 12, 11, 15, 1,
+        13, 6,  4, 9,  8, 15, 3, 0, 11, 1,  2, 12,  5, 10, 14, 7,
+         1, 10, 13, 0,  6, 9, 8, 7,  4, 15, 14, 3, 11, 5,  2, 12,
+    },
+    {
+         7, 13, 14, 3,  0, 6, 9, 10, 1, 2, 8,  5, 11, 12, 4, 15,
+        13, 8,  11, 5,  6, 15, 0, 3,  4, 7, 2, 12, 1,  10, 14, 9,
+        10, 6,  9,  0, 12, 11, 7, 13, 15, 1, 3, 14, 5,  2,  8,  4,
+         3, 15, 0,  6, 10, 1, 13, 8,  9, 4, 5, 11, 12, 7,  2,  14,
+    },
+    {
+         2, 12, 4,  1,  7, 10, 11, 6,  8, 5, 3, 15, 13, 0, 14, 9,
+        14, 11, 2,  12, 4, 7,  13, 1,  5, 0, 15, 10, 3,  9, 8,  6,
+         4, 2,  1,  11, 10, 13, 7, 8, 15, 9, 12, 5,  6,  3, 0, 14,
+        11, 8,  12, 7,  1, 14, 2, 13, 6, 15, 0,  9, 10, 4, 5,  3,
+    },
+    {
+        12, 1,  10, 15, 9, 2,  6, 8,  0, 13, 3, 4, 14, 7, 5, 11,
+        10, 15, 4,  2,  7, 12, 9, 5,  6, 1,  13, 14, 0, 11, 3, 8,
+         9, 14, 15, 5,  2, 8,  12, 3,  7, 0,  4, 10, 1, 13, 11, 6,
+         4, 3,  2,  12, 9, 5,  15, 10, 11, 14, 1, 7,  6, 0,  8, 13,
+    },
+    {
+         4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7,  5, 10, 6, 1,
+        13, 0,  11, 7,  4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6,
+         1, 4,  11, 13, 12, 3, 7, 14, 10, 15, 6, 8,  0, 5,  9, 2,
+         6, 11, 13, 8,  1, 4, 10, 7,  9, 5,  0, 15, 14, 2,  3, 12,
+    },
+    {
+        13, 2,  8, 4,  6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7,
+         1, 15, 13, 8, 10, 3,  7,  4, 12, 5, 6, 11, 0, 14, 9,  2,
+         7, 11, 4,  1,  9, 12, 14, 2, 0,  6, 10, 13, 15, 3, 5, 8,
+         2, 1,  14, 7,  4, 10, 8,  13, 15, 12, 9, 0,  3,  5, 6, 11,
+    },
+};
+
+static const uint8_t octeon_kasumi_s7[128] = {
+     54,  50,  62,  56,  22,  34,  94,  96,  38,   6,  63,  93,   2,  18,
+    123,  33,  55, 113,  39, 114,  21,  67,  65,  12,  47,  73,  46,  27,
+     25, 111, 124,  81,  53,   9, 121,  79,  52,  60,  58,  48, 101, 127,
+     40, 120, 104,  70,  71,  43,  20, 122,  72,  61,  23, 109,  13, 100,
+     77,   1,  16,   7,  82,  10, 105,  98, 117, 116,  76,  11,  89, 106,
+      0, 125, 118,  99,  86,  69,  30,  57, 126,  87, 112,  51,  17,   5,
+     95,  14,  90,  84,  91,   8,  35, 103,  32,  97,  28,  66, 102,  31,
+     26,  45,  75,   4,  85,  92,  37,  74,  80,  49,  68,  29, 115,  44,
+     64, 107, 108,  24, 110,  83,  36,  78,  42,  19,  15,  41,  88, 119,
+     59,   3,
+};
+
+static const uint16_t octeon_kasumi_s9[512] = {
+    167, 239, 161, 379, 391, 334,   9, 338,  38, 226,  48, 358, 452, 385,
+     90, 397, 183, 253, 147, 331, 415, 340,  51, 362, 306, 500, 262,  82,
+    216, 159, 356, 177, 175, 241, 489,  37, 206,  17,   0, 333,  44, 254,
+    378,  58, 143, 220,  81, 400,  95,   3, 315, 245,  54, 235, 218, 405,
+    472, 264, 172, 494, 371, 290, 399,  76, 165, 197, 395, 121, 257, 480,
+    423, 212, 240,  28, 462, 176, 406, 507, 288, 223, 501, 407, 249, 265,
+     89, 186, 221, 428, 164,  74, 440, 196, 458, 421, 350, 163, 232, 158,
+    134, 354,  13, 250, 491, 142, 191,  69, 193, 425, 152, 227, 366, 135,
+    344, 300, 276, 242, 437, 320, 113, 278,  11, 243,  87, 317,  36,  93,
+    496,  27, 487, 446, 482,  41,  68, 156, 457, 131, 326, 403, 339,  20,
+     39, 115, 442, 124, 475, 384, 508,  53, 112, 170, 479, 151, 126, 169,
+     73, 268, 279, 321, 168, 364, 363, 292,  46, 499, 393, 327, 324,  24,
+    456, 267, 157, 460, 488, 426, 309, 229, 439, 506, 208, 271, 349, 401,
+    434, 236,  16, 209, 359,  52,  56, 120, 199, 277, 465, 416, 252, 287,
+    246,   6,  83, 305, 420, 345, 153, 502,  65,  61, 244, 282, 173, 222,
+    418,  67, 386, 368, 261, 101, 476, 291, 195, 430,  49,  79, 166, 330,
+    280, 383, 373, 128, 382, 408, 155, 495, 367, 388, 274, 107, 459, 417,
+     62, 454, 132, 225, 203, 316, 234,  14, 301,  91, 503, 286, 424, 211,
+    347, 307, 140, 374,  35, 103, 125, 427,  19, 214, 453, 146, 498, 314,
+    444, 230, 256, 329, 198, 285,  50, 116,  78, 410,  10, 205, 510, 171,
+    231,  45, 139, 467,  29,  86, 505,  32,  72,  26, 342, 150, 313, 490,
+    431, 238, 411, 325, 149, 473,  40, 119, 174, 355, 185, 233, 389,  71,
+    448, 273, 372,  55, 110, 178, 322,  12, 469, 392, 369, 190,   1, 109,
+    375, 137, 181,  88,  75, 308, 260, 484,  98, 272, 370, 275, 412, 111,
+    336, 318,   4, 504, 492, 259, 304,  77, 337, 435,  21, 357, 303, 332,
+    483,  18,  47,  85,  25, 497, 474, 289, 100, 269, 296, 478, 270, 106,
+     31, 104, 433,  84, 414, 486, 394,  96,  99, 154, 511, 148, 413, 361,
+    409, 255, 162, 215, 302, 201, 266, 351, 343, 144, 441, 365, 108, 298,
+    251,  34, 182, 509, 138, 210, 335, 133, 311, 352, 328, 141, 396, 346,
+    123, 319, 450, 281, 429, 228, 443, 481,  92, 404, 485, 422, 248, 297,
+     23, 213, 130, 466,  22, 217, 283,  70, 294, 360, 419, 127, 312, 377,
+      7, 468, 194,   2, 117, 295, 463, 258, 224, 447, 247, 187,  80, 398,
+    284, 353, 105, 390, 299, 471, 470, 184,  57, 200, 348,  63, 204, 188,
+     33, 451,  97,  30, 310, 219,  94, 160, 129, 493,  64, 179, 263, 102,
+    189, 207, 114, 402, 438, 477, 387, 122, 192,  42, 381,   5, 145, 118,
+    180, 449, 293, 323, 136, 380,  43,  66,  60, 455, 341, 445, 202, 432,
+      8, 237,  15, 376, 436, 464,  59, 461,
+};
+
+static const uint16_t octeon_kasumi_constants[8] = {
+    0x0123, 0x4567, 0x89ab, 0xcdef, 0xfedc, 0xba98, 0x7654, 0x3210,
+};
+
+typedef struct OcteonKasumiSubkeys {
+    uint16_t kli1[8];
+    uint16_t kli2[8];
+    uint16_t koi1[8];
+    uint16_t koi2[8];
+    uint16_t koi3[8];
+    uint16_t kii1[8];
+    uint16_t kii2[8];
+    uint16_t kii3[8];
+} OcteonKasumiSubkeys;
+
+static uint64_t octeon_des_permute(uint64_t input, const uint8_t *table,
+                                   size_t output_bits, size_t input_bits)
+{
+    uint64_t out = 0;
+
+    for (size_t i = 0; i < output_bits; i++) {
+        unsigned src = table[i] - 1;
+
+        out = (out << 1) | ((input >> (input_bits - 1 - src)) & 1);
+    }
+    return out;
+}
+
+static uint32_t octeon_des_rotate28(uint32_t v, unsigned shift)
+{
+    return ((v << shift) | (v >> (28 - shift))) & 0x0fffffffU;
+}
+
+static void octeon_des_expand_subkeys(uint64_t key, uint64_t subkeys[16])
+{
+    uint64_t permuted = octeon_des_permute(key, octeon_des_pc1,
+                                           ARRAY_SIZE(octeon_des_pc1), 64);
+    uint32_t c = (permuted >> 28) & 0x0fffffffU;
+    uint32_t d = permuted & 0x0fffffffU;
+
+    for (int i = 0; i < 16; i++) {
+        c = octeon_des_rotate28(c, octeon_des_rotations[i]);
+        d = octeon_des_rotate28(d, octeon_des_rotations[i]);
+        subkeys[i] = octeon_des_permute(((uint64_t)c << 28) | d,
+                                        octeon_des_pc2,
+                                        ARRAY_SIZE(octeon_des_pc2), 56);
+    }
+}
+
+static uint32_t octeon_des_f(uint32_t r, uint64_t subkey)
+{
+    uint64_t expanded = octeon_des_permute(r, octeon_des_e,
+                                           ARRAY_SIZE(octeon_des_e), 32);
+    uint32_t out = 0;
+
+    expanded ^= subkey;
+    for (int i = 0; i < 8; i++) {
+        uint8_t sextet = (expanded >> (42 - i * 6)) & 0x3f;
+        uint8_t row = ((sextet & 0x20) >> 4) | (sextet & 0x01);
+        uint8_t col = (sextet >> 1) & 0x0f;
+
+        out = (out << 4) | octeon_des_sboxes[i][row * 16 + col];
+    }
+
+    return octeon_des_permute(out, octeon_des_p, ARRAY_SIZE(octeon_des_p), 32);
+}
+
+static uint64_t octeon_des_block_crypt(uint64_t block, uint64_t key,
+                                       bool encrypt)
+{
+    uint64_t subkeys[16];
+    uint64_t permuted = octeon_des_permute(block, octeon_des_ip,
+                                           ARRAY_SIZE(octeon_des_ip), 64);
+    uint32_t l = permuted >> 32;
+    uint32_t r = permuted;
+
+    octeon_des_expand_subkeys(key, subkeys);
+
+    for (int i = 0; i < 16; i++) {
+        uint32_t next = l ^ octeon_des_f(r, subkeys[encrypt ? i : 15 - i]);
+
+        l = r;
+        r = next;
+    }
+
+    return octeon_des_permute(((uint64_t)r << 32) | l,
+                              octeon_des_fp, ARRAY_SIZE(octeon_des_fp), 64);
+}
+
+static uint64_t octeon_3des_block_crypt(uint64_t block, const uint64_t keys[3],
+                                        bool encrypt)
+{
+    if (encrypt) {
+        block = octeon_des_block_crypt(block, keys[0], true);
+        block = octeon_des_block_crypt(block, keys[1], false);
+        block = octeon_des_block_crypt(block, keys[2], true);
+    } else {
+        block = octeon_des_block_crypt(block, keys[2], false);
+        block = octeon_des_block_crypt(block, keys[1], true);
+        block = octeon_des_block_crypt(block, keys[0], false);
+    }
+    return block;
+}
+
+static void octeon_3des_crypt_common(MIPSOcteonCryptoState *crypto,
+                                     uint64_t input_reg,
+                                     bool encrypt, bool cbc)
+{
+    const uint64_t keys[3] = {
+        crypto->des3_key[0],
+        crypto->des3_key[1],
+        crypto->des3_key[2],
+    };
+    uint64_t block = input_reg;
+
+    if (cbc) {
+        if (encrypt) {
+            block ^= crypto->des3_iv;
+            block = octeon_3des_block_crypt(block, keys, true);
+            crypto->des3_iv = block;
+        } else {
+            block = octeon_3des_block_crypt(block, keys, false);
+            block ^= crypto->des3_iv;
+            crypto->des3_iv = input_reg;
+        }
+    } else {
+        block = octeon_3des_block_crypt(block, keys, encrypt);
+    }
+
+    crypto->des3_result = block;
+}
+
+static inline uint16_t octeon_rol16(uint16_t value, unsigned int bits)
+{
+    return (value << bits) | (value >> (16 - bits));
+}
+
+static void octeon_kasumi_key_schedule(const uint64_t key_regs[2],
+                                       OcteonKasumiSubkeys *subkeys)
+{
+    uint16_t key[8];
+    uint16_t key_prime[8];
+
+    key[0] = key_regs[0] >> 48;
+    key[1] = key_regs[0] >> 32;
+    key[2] = key_regs[0] >> 16;
+    key[3] = key_regs[0];
+    key[4] = key_regs[1] >> 48;
+    key[5] = key_regs[1] >> 32;
+    key[6] = key_regs[1] >> 16;
+    key[7] = key_regs[1];
+
+    for (int i = 0; i < 8; i++) {
+        key_prime[i] = key[i] ^ octeon_kasumi_constants[i];
+    }
+
+    for (int i = 0; i < 8; i++) {
+        subkeys->kli1[i] = octeon_rol16(key[i], 1);
+        subkeys->kli2[i] = key_prime[(i + 2) & 7];
+        subkeys->koi1[i] = octeon_rol16(key[(i + 1) & 7], 5);
+        subkeys->koi2[i] = octeon_rol16(key[(i + 5) & 7], 8);
+        subkeys->koi3[i] = octeon_rol16(key[(i + 6) & 7], 13);
+        subkeys->kii1[i] = key_prime[(i + 4) & 7];
+        subkeys->kii2[i] = key_prime[(i + 3) & 7];
+        subkeys->kii3[i] = key_prime[(i + 7) & 7];
+    }
+}
+
+static uint16_t octeon_kasumi_fi(uint16_t in, uint16_t subkey)
+{
+    uint16_t nine = in >> 7;
+    uint16_t seven = in & 0x7f;
+
+    nine = octeon_kasumi_s9[nine] ^ seven;
+    seven = octeon_kasumi_s7[seven] ^ (nine & 0x7f);
+    seven ^= subkey >> 9;
+    nine ^= subkey & 0x1ff;
+    nine = octeon_kasumi_s9[nine] ^ seven;
+    seven = octeon_kasumi_s7[seven] ^ (nine & 0x7f);
+    return (seven << 9) | nine;
+}
+
+static uint32_t octeon_kasumi_fo(uint32_t in, int index,
+                                 const OcteonKasumiSubkeys *subkeys)
+{
+    uint16_t left = in >> 16;
+    uint16_t right = in;
+
+    left ^= subkeys->koi1[index];
+    left = octeon_kasumi_fi(left, subkeys->kii1[index]);
+    left ^= right;
+    right ^= subkeys->koi2[index];
+    right = octeon_kasumi_fi(right, subkeys->kii2[index]);
+    right ^= left;
+    left ^= subkeys->koi3[index];
+    left = octeon_kasumi_fi(left, subkeys->kii3[index]);
+    left ^= right;
+
+    return ((uint32_t)right << 16) | left;
+}
+
+static uint32_t octeon_kasumi_fl(uint32_t in, int index,
+                                 const OcteonKasumiSubkeys *subkeys)
+{
+    uint16_t left = in >> 16;
+    uint16_t right = in;
+    uint16_t a = left & subkeys->kli1[index];
+    uint16_t b;
+
+    right ^= octeon_rol16(a, 1);
+    b = right | subkeys->kli2[index];
+    left ^= octeon_rol16(b, 1);
+    return ((uint32_t)left << 16) | right;
+}
+
+static uint64_t octeon_kasumi_block_encrypt(uint64_t block,
+                                            const uint64_t key_regs[2])
+{
+    OcteonKasumiSubkeys subkeys;
+    uint32_t left = block >> 32;
+    uint32_t right = block;
+
+    octeon_kasumi_key_schedule(key_regs, &subkeys);
+
+    for (int i = 0; i < 8; ) {
+        uint32_t temp = octeon_kasumi_fl(left, i, &subkeys);
+
+        temp = octeon_kasumi_fo(temp, i++, &subkeys);
+        right ^= temp;
+        temp = octeon_kasumi_fo(right, i, &subkeys);
+        temp = octeon_kasumi_fl(temp, i++, &subkeys);
+        left ^= temp;
+    }
+
+    return ((uint64_t)left << 32) | right;
+}
+
+static void octeon_kasumi_crypt_common(MIPSOcteonCryptoState *crypto,
+                                       uint64_t input_reg, bool cbc)
+{
+    const uint64_t key_regs[2] = {
+        crypto->des3_key[0],
+        crypto->des3_key[1],
+    };
+    uint64_t block = input_reg;
+
+    if (cbc) {
+        block ^= crypto->des3_iv;
+    }
+
+    block = octeon_kasumi_block_encrypt(block, key_regs);
+    if (cbc) {
+        crypto->des3_iv = block;
+    }
+    crypto->des3_result = block;
+}
+
+static void octeon_aes_load_key(const MIPSOcteonCryptoState *crypto,
+                                uint8_t *key, size_t keylen)
+{
+    stq_be_p(key, crypto->aes_key[0]);
+    stq_be_p(key + 8, crypto->aes_key[1]);
+    if (keylen > 16) {
+        stq_be_p(key + 16, crypto->aes_key[2]);
+    }
+    if (keylen > 24) {
+        stq_be_p(key + 24, crypto->aes_key[3]);
+    }
+}
+
+static void octeon_aes_load_block(const uint64_t regs[2], uint8_t *block)
+{
+    stq_be_p(block, regs[0]);
+    stq_be_p(block + 8, regs[1]);
+}
+
+static void octeon_aes_store_block(uint64_t regs[2], const uint8_t *block)
+{
+    regs[0] = ldq_be_p(block);
+    regs[1] = ldq_be_p(block + 8);
+}
+
+static void octeon_aes_encrypt_common(MIPSOcteonCryptoState *crypto, bool cbc)
+{
+    AES_KEY key;
+    uint8_t in[16];
+    uint8_t out[16];
+    uint8_t iv[16];
+    uint8_t raw_key[32] = {};
+    int bits = octeon_aes_key_bits(crypto);
+
+    if (!bits) {
+        return;
+    }
+
+    octeon_aes_load_key(crypto, raw_key, bits / 8);
+    octeon_aes_load_block(crypto->aes_input, in);
+    if (cbc) {
+        int i;
+
+        octeon_aes_load_block(crypto->aes_iv, iv);
+        for (i = 0; i < sizeof(in); i++) {
+            in[i] ^= iv[i];
+        }
+    }
+
+    AES_set_encrypt_key(raw_key, bits, &key);
+    AES_encrypt(in, out, &key);
+    octeon_aes_store_block(crypto->aes_result, out);
+    if (cbc) {
+        octeon_aes_store_block(crypto->aes_iv, out);
+    }
+}
+
+static void octeon_aes_decrypt_common(MIPSOcteonCryptoState *crypto, bool cbc)
+{
+    AES_KEY key;
+    uint8_t in[16];
+    uint8_t out[16];
+    uint8_t iv[16];
+    uint8_t next_iv[16];
+    uint8_t raw_key[32] = {};
+    int bits = octeon_aes_key_bits(crypto);
+    int i;
+
+    if (!bits) {
+        return;
+    }
+
+    octeon_aes_load_key(crypto, raw_key, bits / 8);
+    octeon_aes_load_block(crypto->aes_input, in);
+    if (cbc) {
+        memcpy(next_iv, in, sizeof(next_iv));
+        octeon_aes_load_block(crypto->aes_iv, iv);
+    }
+
+    AES_set_decrypt_key(raw_key, bits, &key);
+    AES_decrypt(in, out, &key);
+    if (cbc) {
+        for (i = 0; i < sizeof(out); i++) {
+            out[i] ^= iv[i];
+        }
+    }
+
+    octeon_aes_store_block(crypto->aes_result, out);
+    if (cbc) {
+        octeon_aes_store_block(crypto->aes_iv, next_iv);
+    }
+}
+
+static void octeon_gfm_mul(const uint64_t x[2], const uint64_t y[2],
+                           uint16_t poly, uint64_t out[2])
+{
+    uint64_t zh = 0, zl = 0;
+    uint64_t vh = y[0], vl = y[1];
+    uint64_t rh = (uint64_t)poly << 48;
+    int i;
+
+    for (i = 0; i < 128; i++) {
+        bool bit;
+        bool lsb;
+
+        if (i < 64) {
+            bit = (x[0] >> (63 - i)) & 1;
+        } else {
+            bit = (x[1] >> (127 - i)) & 1;
+        }
+        if (bit) {
+            zh ^= vh;
+            zl ^= vl;
+        }
+
+        lsb = vl & 1;
+        vl = (vh << 63) | (vl >> 1);
+        vh >>= 1;
+        if (lsb) {
+            vh ^= rh;
+        }
+    }
+
+    out[0] = zh;
+    out[1] = zl;
+}
+
+uint64_t helper_octeon_cop2_dmfc2(CPUMIPSState *env, uint32_t sel)
+{
+    MIPSOcteonCryptoState *crypto = &env->octeon_crypto;
+
+    if (crypto->shared_mode == OCTEON_SHARED_MODE_SNOW3G) {
+        if (sel >= OCTEON_COP2_SEL_SNOW3G_LFSR0 &&
+            sel <= OCTEON_COP2_SEL_SNOW3G_LFSR7) {
+            unsigned int idx = sel - OCTEON_COP2_SEL_SNOW3G_LFSR0;
+
+            return ((uint64_t)crypto->snow3g_lfsr[idx * 2] << 32) |
+                   crypto->snow3g_lfsr[idx * 2 + 1];
+        }
+        switch (sel) {
+        case OCTEON_COP2_SEL_SNOW3G_RESULT:
+            return crypto->snow3g_result;
+        case OCTEON_COP2_SEL_SNOW3G_FSM0:
+        case OCTEON_COP2_SEL_SNOW3G_FSM1:
+        case OCTEON_COP2_SEL_SNOW3G_FSM2:
+            return crypto->snow3g_fsm[sel - OCTEON_COP2_SEL_SNOW3G_FSM0];
+        default:
+            break;
+        }
+    }
+
+    switch (sel) {
+    case OCTEON_COP2_SEL_3DES_KEY0:
+    case OCTEON_COP2_SEL_3DES_KEY1:
+    case OCTEON_COP2_SEL_3DES_KEY2:
+        return crypto->des3_key[sel - OCTEON_COP2_SEL_3DES_KEY0];
+    case OCTEON_COP2_SEL_3DES_IV:
+        return crypto->des3_iv;
+    case OCTEON_COP2_SEL_3DES_RESULT_MF:
+    case OCTEON_COP2_SEL_3DES_RESULT_MT:
+        return crypto->des3_result;
+    case OCTEON_COP2_SEL_AES_RESINP0:
+    case OCTEON_COP2_SEL_AES_RESINP1:
+        return crypto->aes_result[sel - OCTEON_COP2_SEL_AES_RESINP0];
+    case OCTEON_COP2_SEL_AES_KEY0:
+    case OCTEON_COP2_SEL_AES_KEY1:
+    case OCTEON_COP2_SEL_AES_KEY2:
+    case OCTEON_COP2_SEL_AES_KEY3:
+        return crypto->aes_key[sel - OCTEON_COP2_SEL_AES_KEY0];
+    case OCTEON_COP2_SEL_AES_KEYLENGTH:
+        return crypto->aes_keylen;
+    case OCTEON_COP2_SEL_AES_INP0:
+        return crypto->aes_input[0];
+    case OCTEON_COP2_SEL_AES_IV0:
+    case OCTEON_COP2_SEL_AES_IV1:
+        return crypto->aes_iv[sel - OCTEON_COP2_SEL_AES_IV0];
+    case OCTEON_COP2_SEL_CRC_POLYNOMIAL:
+        return crypto->crc_poly;
+    case OCTEON_COP2_SEL_CRC_IV:
+        return crypto->crc_iv;
+    case OCTEON_COP2_SEL_CRC_LEN:
+        return crypto->crc_len;
+    case OCTEON_COP2_SEL_CRC_IV_REFLECT:
+        return octeon_crc_reflect32_by_byte(crypto->crc_iv);
+    case OCTEON_COP2_SEL_HSH_DATW0:
+    case OCTEON_COP2_SEL_HSH_DATW1:
+    case OCTEON_COP2_SEL_HSH_DATW2:
+    case OCTEON_COP2_SEL_HSH_DATW3:
+    case OCTEON_COP2_SEL_HSH_DATW4:
+    case OCTEON_COP2_SEL_HSH_DATW5:
+    case OCTEON_COP2_SEL_HSH_DATW6:
+    case OCTEON_COP2_SEL_HSH_DATW7:
+    case OCTEON_COP2_SEL_HSH_DATW8:
+    case OCTEON_COP2_SEL_HSH_DATW9:
+    case OCTEON_COP2_SEL_HSH_DATW10:
+    case OCTEON_COP2_SEL_HSH_DATW11:
+    case OCTEON_COP2_SEL_HSH_DATW12:
+    case OCTEON_COP2_SEL_HSH_DATW13:
+    case OCTEON_COP2_SEL_HSH_DATW14:
+        return crypto->hsh_datw[sel - OCTEON_COP2_SEL_HSH_DATW0];
+    case OCTEON_COP2_SEL_HSH_DATW15:
+        return crypto->hsh_datw[15];
+    case OCTEON_COP2_SEL_HSH_IVW0:
+    case OCTEON_COP2_SEL_HSH_IVW1:
+    case OCTEON_COP2_SEL_HSH_IVW2:
+    case OCTEON_COP2_SEL_HSH_IVW3:
+    case OCTEON_COP2_SEL_HSH_IVW4:
+    case OCTEON_COP2_SEL_HSH_IVW5:
+    case OCTEON_COP2_SEL_HSH_IVW6:
+    case OCTEON_COP2_SEL_HSH_IVW7:
+        return crypto->hsh_ivw[sel - OCTEON_COP2_SEL_HSH_IVW0];
+    case OCTEON_COP2_SEL_HSH_IV0:
+    case OCTEON_COP2_SEL_HSH_IV1:
+    case OCTEON_COP2_SEL_HSH_IV2:
+    case OCTEON_COP2_SEL_HSH_IV3:
+        return crypto->hsh_iv[sel - OCTEON_COP2_SEL_HSH_IV0];
+    case OCTEON_COP2_SEL_GFM_MUL_REFLECT0:
+    case OCTEON_COP2_SEL_GFM_MUL_REFLECT1:
+        return crypto->gfm_reflect_mul[sel - OCTEON_COP2_SEL_GFM_MUL_REFLECT0];
+    case OCTEON_COP2_SEL_GFM_RESINP_REFLECT0:
+    case OCTEON_COP2_SEL_GFM_RESINP_REFLECT1:
+        return crypto->gfm_reflect_resinp[
+            sel - OCTEON_COP2_SEL_GFM_RESINP_REFLECT0];
+    case OCTEON_COP2_SEL_GFM_MUL0:
+    case OCTEON_COP2_SEL_GFM_MUL1:
+        return crypto->gfm_mul[sel - OCTEON_COP2_SEL_GFM_MUL0];
+    case OCTEON_COP2_SEL_GFM_RESINP0:
+    case OCTEON_COP2_SEL_GFM_RESINP1:
+        return crypto->gfm_resinp[sel - OCTEON_COP2_SEL_GFM_RESINP0];
+    case OCTEON_COP2_SEL_GFM_POLY:
+        return crypto->gfm_poly;
+    default:
+        return 0;
+    }
+}
+
+void helper_octeon_cop2_dmtc2(CPUMIPSState *env, uint64_t value,
+                              uint32_t sel)
+{
+    MIPSOcteonCryptoState *crypto = &env->octeon_crypto;
+    uint64_t data = value;
+
+    switch (sel) {
+    case OCTEON_COP2_SEL_3DES_KEY0:
+    case OCTEON_COP2_SEL_3DES_KEY1:
+    case OCTEON_COP2_SEL_3DES_KEY2:
+        crypto->des3_key[sel - OCTEON_COP2_SEL_3DES_KEY0] = data;
+        break;
+    case OCTEON_COP2_SEL_3DES_IV:
+        crypto->des3_iv = data;
+        break;
+    case OCTEON_COP2_SEL_3DES_RESULT_MT:
+        crypto->des3_result = data;
+        break;
+    case OCTEON_COP2_SEL_3DES_ENC_CBC:
+        octeon_3des_crypt_common(crypto, data, true, true);
+        break;
+    case OCTEON_COP2_SEL_KAS_ENC_CBC:
+        octeon_kasumi_crypt_common(crypto, data, true);
+        break;
+    case OCTEON_COP2_SEL_3DES_ENC:
+        octeon_3des_crypt_common(crypto, data, true, false);
+        break;
+    case OCTEON_COP2_SEL_KAS_ENC:
+        octeon_kasumi_crypt_common(crypto, data, false);
+        break;
+    case OCTEON_COP2_SEL_3DES_DEC_CBC:
+        octeon_3des_crypt_common(crypto, data, false, true);
+        break;
+    case OCTEON_COP2_SEL_3DES_DEC:
+        octeon_3des_crypt_common(crypto, data, false, false);
+        break;
+    case OCTEON_COP2_SEL_AES_RESINP0:
+    case OCTEON_COP2_SEL_AES_RESINP1:
+        crypto->aes_input[sel - OCTEON_COP2_SEL_AES_RESINP0] = data;
+        crypto->aes_result[sel - OCTEON_COP2_SEL_AES_RESINP0] = data;
+        break;
+    case OCTEON_COP2_SEL_AES_IV0:
+    case OCTEON_COP2_SEL_AES_IV1:
+        crypto->aes_iv[sel - OCTEON_COP2_SEL_AES_IV0] = data;
+        break;
+    case OCTEON_COP2_SEL_AES_KEY0:
+    case OCTEON_COP2_SEL_AES_KEY1:
+    case OCTEON_COP2_SEL_AES_KEY2:
+    case OCTEON_COP2_SEL_AES_KEY3:
+        crypto->aes_key[sel - OCTEON_COP2_SEL_AES_KEY0] = data;
+        break;
+    case OCTEON_COP2_SEL_AES_ENC_CBC0:
+    case OCTEON_COP2_SEL_AES_ENC0:
+    case OCTEON_COP2_SEL_AES_DEC_CBC0:
+    case OCTEON_COP2_SEL_AES_DEC0:
+        crypto->aes_input[0] = data;
+        break;
+    case OCTEON_COP2_SEL_AES_KEYLENGTH:
+        crypto->aes_keylen = data;
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_POLYNOMIAL:
+    case OCTEON_COP2_SEL_CRC_WRITE_POLYNOMIAL_REFLECT:
+        crypto->crc_poly = data;
+        break;
+    case OCTEON_COP2_SEL_CRC_IV:
+        crypto->crc_iv = data;
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_LEN:
+        crypto->crc_len = data;
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_IV_REFLECT:
+        crypto->crc_iv = octeon_crc_reflect32_by_byte((uint32_t)data);
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_BYTE:
+        octeon_crc_update_normal(crypto, data, 1);
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_HALF:
+        octeon_crc_update_normal(crypto, data, 2);
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_WORD:
+        octeon_crc_update_normal(crypto, data, 4);
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_DWORD:
+        octeon_crc_update_normal(crypto, data, 8);
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_VAR:
+        octeon_crc_update_normal(crypto, data, MIN(8U, crypto->crc_len));
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_BYTE_REFLECT:
+        octeon_crc_update_reflect(crypto, data, 1);
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_HALF_REFLECT:
+        octeon_crc_update_reflect(crypto, data, 2);
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_WORD_REFLECT:
+        octeon_crc_update_reflect(crypto, data, 4);
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_DWORD_REFLECT:
+        octeon_crc_update_reflect(crypto, data, 8);
+        break;
+    case OCTEON_COP2_SEL_CRC_WRITE_VAR_REFLECT:
+        octeon_crc_update_reflect(crypto, data, MIN(8U, crypto->crc_len));
+        break;
+    case OCTEON_COP2_SEL_HSH_DATW0:
+    case OCTEON_COP2_SEL_HSH_DATW1:
+    case OCTEON_COP2_SEL_HSH_DATW2:
+    case OCTEON_COP2_SEL_HSH_DATW3:
+    case OCTEON_COP2_SEL_HSH_DATW4:
+    case OCTEON_COP2_SEL_HSH_DATW5:
+    case OCTEON_COP2_SEL_HSH_DATW6:
+    case OCTEON_COP2_SEL_HSH_DATW7:
+    case OCTEON_COP2_SEL_HSH_DATW8:
+    case OCTEON_COP2_SEL_HSH_DATW9:
+    case OCTEON_COP2_SEL_HSH_DATW10:
+    case OCTEON_COP2_SEL_HSH_DATW11:
+    case OCTEON_COP2_SEL_HSH_DATW12:
+    case OCTEON_COP2_SEL_HSH_DATW13:
+    case OCTEON_COP2_SEL_HSH_DATW14:
+        octeon_store_shared_hsh_window(crypto, sel, data);
+        break;
+    case OCTEON_COP2_SEL_HSH_DATW15:
+    case OCTEON_COP2_SEL_HSH_STARTSHA512:
+        crypto->hsh_datw[15] = data;
+        octeon_set_shared_mode(crypto, OCTEON_SHARED_MODE_SHA512);
+        octeon_sha512_transform(crypto);
+        break;
+    case OCTEON_COP2_SEL_HSH_IVW0:
+    case OCTEON_COP2_SEL_HSH_IVW1:
+    case OCTEON_COP2_SEL_HSH_IVW2:
+    case OCTEON_COP2_SEL_HSH_IVW3:
+    case OCTEON_COP2_SEL_HSH_IVW4:
+    case OCTEON_COP2_SEL_HSH_IVW5:
+    case OCTEON_COP2_SEL_HSH_IVW6:
+    case OCTEON_COP2_SEL_HSH_IVW7:
+        octeon_store_shared_hsh_window(crypto, sel, data);
+        break;
+    case OCTEON_COP2_SEL_GFM_MUL_REFLECT0:
+    case OCTEON_COP2_SEL_GFM_MUL_REFLECT1:
+        crypto->gfm_reflect_mul[
+            sel - OCTEON_COP2_SEL_GFM_MUL_REFLECT0] = data;
+        break;
+    case OCTEON_COP2_SEL_GFM_XOR0_REFLECT:
+        crypto->gfm_reflect_xor0 = data;
+        break;
+    case OCTEON_COP2_SEL_GFM_MUL0:
+    case OCTEON_COP2_SEL_GFM_MUL1:
+        crypto->gfm_mul[sel - OCTEON_COP2_SEL_GFM_MUL0] = data;
+        break;
+    case OCTEON_COP2_SEL_GFM_RESINP0:
+    case OCTEON_COP2_SEL_GFM_RESINP1:
+        crypto->gfm_resinp[sel - OCTEON_COP2_SEL_GFM_RESINP0] = data;
+        break;
+    case OCTEON_COP2_SEL_GFM_XOR0:
+        crypto->gfm_xor0 = data;
+        break;
+    case OCTEON_COP2_SEL_GFM_POLY:
+        crypto->gfm_poly = data;
+        break;
+    case OCTEON_COP2_SEL_HSH_DAT0:
+    case OCTEON_COP2_SEL_HSH_DAT1:
+    case OCTEON_COP2_SEL_HSH_DAT2:
+    case OCTEON_COP2_SEL_HSH_DAT3:
+    case OCTEON_COP2_SEL_HSH_DAT4:
+    case OCTEON_COP2_SEL_HSH_DAT5:
+    case OCTEON_COP2_SEL_HSH_DAT6:
+        crypto->hsh_dat[sel - OCTEON_COP2_SEL_HSH_DAT0] = data;
+        break;
+    case OCTEON_COP2_SEL_HSH_IV0:
+    case OCTEON_COP2_SEL_HSH_IV1:
+    case OCTEON_COP2_SEL_HSH_IV2:
+    case OCTEON_COP2_SEL_HSH_IV3:
+        crypto->hsh_iv[sel - OCTEON_COP2_SEL_HSH_IV0] = data;
+        break;
+    case OCTEON_COP2_SEL_HSH_STARTMD5:
+        crypto->hsh_dat[7] = data;
+        octeon_md5_transform(crypto);
+        break;
+    case OCTEON_COP2_SEL_HSH_STARTSHA256:
+        crypto->hsh_dat[7] = data;
+        octeon_sha256_transform(crypto);
+        break;
+    case OCTEON_COP2_SEL_HSH_STARTSHA_COMPAT:
+    case OCTEON_COP2_SEL_HSH_STARTSHA:
+        crypto->hsh_dat[7] = data;
+        octeon_sha1_transform(crypto);
+        break;
+    case OCTEON_COP2_SEL_GFM_XORMUL1_REFLECT:
+        octeon_gfm_mul_reflect(crypto, data);
+        break;
+    case OCTEON_COP2_SEL_AES_ENC_CBC1:
+        crypto->aes_input[1] = data;
+        octeon_aes_encrypt_common(crypto, true);
+        break;
+    case OCTEON_COP2_SEL_AES_ENC1:
+        crypto->aes_input[1] = data;
+        octeon_aes_encrypt_common(crypto, false);
+        break;
+    case OCTEON_COP2_SEL_AES_DEC_CBC1:
+        crypto->aes_input[1] = data;
+        octeon_aes_decrypt_common(crypto, true);
+        break;
+    case OCTEON_COP2_SEL_AES_DEC1:
+        crypto->aes_input[1] = data;
+        octeon_aes_decrypt_common(crypto, false);
+        break;
+    case OCTEON_COP2_SEL_GFM_XORMUL1: {
+        uint64_t in[2] = {
+            crypto->gfm_resinp[0] ^ crypto->gfm_xor0,
+            crypto->gfm_resinp[1] ^ data,
+        };
+
+        /*
+         * A 64-bit reflected GFM operation uses this XORMUL1 path when the
+         * block is programmed with only MUL0, an 8-bit polynomial, and a zero
+         * high input half. Detect that shape and use the reflected helper
+         * instead of the normal GHASH-style multiplier.
+         */
+        if (crypto->gfm_poly <= 0xff &&
+            crypto->gfm_mul[1] == 0 &&
+            in[0] == 0) {
+            octeon_gfm_mul64_uia2(in, crypto->gfm_mul,
+                                  crypto->gfm_poly, crypto->gfm_resinp);
+        } else {
+            octeon_gfm_mul(in, crypto->gfm_mul, crypto->gfm_poly,
+                           crypto->gfm_resinp);
+        }
+        /*
+         * GFM_XOR0 is a write-only staging half consumed by the next XORMUL1
+         * operation, so clear it once the combined multiply has been issued.
+         */
+        crypto->gfm_xor0 = 0;
+        break;
+    }
+    case OCTEON_COP2_SEL_SNOW3G_START:
+        octeon_snow3g_start(crypto, data);
+        break;
+    case OCTEON_COP2_SEL_SNOW3G_MORE:
+        octeon_snow3g_more(crypto);
+        break;
+    default:
+        break;
+    }
+}

-- 
2.54.0


Reply via email to