Add the Octeon Camellia ROUND, FL, and FLINV selectors and model the round engine that reuses the AES RESINP bank.
Implement the Camellia F-function and FL layers directly from RFC 3713 so guest-managed key schedules can drive the engine through the hardware interface. Signed-off-by: James Hilliard <[email protected]> --- Changes v1 -> v2: - Drop the Octeon prefix from generic Camellia helper routines. (suggested by Philippe Mathieu-Daudé) - Add selector dispatch updates in octeon_translate.c after moving COP2 decode out of translate.c. (suggested by Philippe Mathieu-Daudé) Changes v5 -> v6: - Use RESINP wording for the Camellia shared selector aliases. --- target/mips/cpu.h | 9 +++ target/mips/tcg/octeon_crypto.c | 120 +++++++++++++++++++++++++++++++++++++ target/mips/tcg/octeon_translate.c | 3 + 3 files changed, 132 insertions(+) diff --git a/target/mips/cpu.h b/target/mips/cpu.h index 0b16382dd3..ba886735d5 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -595,6 +595,14 @@ typedef enum MIPSOcteonCop2Sel { OCTEON_COP2_SEL_AES_DEC0 = 0x010e, OCTEON_COP2_SEL_AES_KEYLENGTH = 0x0110, OCTEON_COP2_SEL_AES_INP0 = 0x0111, + /* + * Camellia reuses the AES RESINP bank and adds per-round and + * diffusion-layer selectors for the guest-managed key schedule. + */ + OCTEON_COP2_SEL_CAMELLIA_RESINP0 = OCTEON_COP2_SEL_AES_RESINP0, + OCTEON_COP2_SEL_CAMELLIA_RESINP1 = OCTEON_COP2_SEL_AES_RESINP1, + OCTEON_COP2_SEL_CAMELLIA_FL = 0x0115, + OCTEON_COP2_SEL_CAMELLIA_FLINV = 0x0116, /* * SMS4 reuses the AES RESINP, IV, and key banks and only adds * operation selectors for ECB/CBC encrypt/decrypt. @@ -696,6 +704,7 @@ typedef enum MIPSOcteonCop2Sel { OCTEON_COP2_SEL_AES_ENC1 = 0x310b, OCTEON_COP2_SEL_AES_DEC_CBC1 = 0x310d, OCTEON_COP2_SEL_AES_DEC1 = 0x310f, + OCTEON_COP2_SEL_CAMELLIA_ROUND = 0x3114, OCTEON_COP2_SEL_SMS4_ENC_CBC1 = 0x3119, OCTEON_COP2_SEL_SMS4_ENC1 = 0x311b, OCTEON_COP2_SEL_SMS4_DEC_CBC1 = 0x311d, diff --git a/target/mips/tcg/octeon_crypto.c b/target/mips/tcg/octeon_crypto.c index 30df901e6b..27e34b7f43 100644 --- a/target/mips/tcg/octeon_crypto.c +++ b/target/mips/tcg/octeon_crypto.c @@ -1650,6 +1650,117 @@ static void octeon_aes_store_block(uint64_t regs[2], const uint8_t *block) regs[1] = ldq_be_p(block + 8); } +static const uint8_t camellia_sbox1[256] = { + 112, 130, 44, 236, 179, 39, 192, 229, 228, 133, 87, 53, 234, 12, + 174, 65, 35, 239, 107, 147, 69, 25, 165, 33, 237, 14, 79, 78, + 29, 101, 146, 189, 134, 184, 175, 143, 124, 235, 31, 206, 62, 48, + 220, 95, 94, 197, 11, 26, 166, 225, 57, 202, 213, 71, 93, 61, + 217, 1, 90, 214, 81, 86, 108, 77, 139, 13, 154, 102, 251, 204, + 176, 45, 116, 18, 43, 32, 240, 177, 132, 153, 223, 76, 203, 194, + 52, 126, 118, 5, 109, 183, 169, 49, 209, 23, 4, 215, 20, 88, + 58, 97, 222, 27, 17, 28, 50, 15, 156, 22, 83, 24, 242, 34, + 254, 68, 207, 178, 195, 181, 122, 145, 36, 8, 232, 168, 96, 252, + 105, 80, 170, 208, 160, 125, 161, 137, 98, 151, 84, 91, 30, 149, + 224, 255, 100, 210, 16, 196, 0, 72, 163, 247, 117, 219, 138, 3, + 230, 218, 9, 63, 221, 148, 135, 92, 131, 2, 205, 74, 144, 51, + 115, 103, 246, 243, 157, 127, 191, 226, 82, 155, 216, 38, 200, 55, + 198, 59, 129, 150, 111, 75, 19, 190, 99, 46, 233, 121, 167, 140, + 159, 110, 188, 142, 41, 245, 249, 182, 47, 253, 180, 89, 120, 152, + 6, 106, 231, 70, 113, 186, 212, 37, 171, 66, 136, 162, 141, 250, + 114, 7, 185, 85, 248, 238, 172, 10, 54, 73, 42, 104, 60, 56, + 241, 164, 64, 40, 211, 123, 187, 201, 67, 193, 21, 227, 173, 244, + 119, 199, 128, 158, +}; + +static inline uint8_t camellia_rotl8(uint8_t v, unsigned int shift) +{ + return (v << shift) | (v >> (8 - shift)); +} + +static inline uint8_t camellia_sbox2(uint8_t x) +{ + return camellia_rotl8(camellia_sbox1[x], 1); +} + +static inline uint8_t camellia_sbox3(uint8_t x) +{ + return camellia_rotl8(camellia_sbox1[x], 7); +} + +static inline uint8_t camellia_sbox4(uint8_t x) +{ + return camellia_sbox1[camellia_rotl8(x, 1)]; +} + +static uint64_t camellia_f(uint64_t input, uint64_t key) +{ + uint64_t x = input ^ key; + uint8_t t1 = camellia_sbox1[x >> 56]; + uint8_t t2 = camellia_sbox2((x >> 48) & 0xff); + uint8_t t3 = camellia_sbox3((x >> 40) & 0xff); + uint8_t t4 = camellia_sbox4((x >> 32) & 0xff); + uint8_t t5 = camellia_sbox2((x >> 24) & 0xff); + uint8_t t6 = camellia_sbox3((x >> 16) & 0xff); + uint8_t t7 = camellia_sbox4((x >> 8) & 0xff); + uint8_t t8 = camellia_sbox1[x & 0xff]; + uint8_t y1 = t1 ^ t3 ^ t4 ^ t6 ^ t7 ^ t8; + uint8_t y2 = t1 ^ t2 ^ t4 ^ t5 ^ t7 ^ t8; + uint8_t y3 = t1 ^ t2 ^ t3 ^ t5 ^ t6 ^ t8; + uint8_t y4 = t2 ^ t3 ^ t4 ^ t5 ^ t6 ^ t7; + uint8_t y5 = t1 ^ t2 ^ t6 ^ t7 ^ t8; + uint8_t y6 = t2 ^ t3 ^ t5 ^ t7 ^ t8; + uint8_t y7 = t3 ^ t4 ^ t5 ^ t6 ^ t8; + uint8_t y8 = t1 ^ t4 ^ t5 ^ t6 ^ t7; + + return ((uint64_t)y1 << 56) | ((uint64_t)y2 << 48) | + ((uint64_t)y3 << 40) | ((uint64_t)y4 << 32) | + ((uint64_t)y5 << 24) | ((uint64_t)y6 << 16) | + ((uint64_t)y7 << 8) | y8; +} + +static uint64_t camellia_fl(uint64_t input, uint64_t key) +{ + uint32_t x1 = input >> 32; + uint32_t x2 = input; + uint32_t k1 = key >> 32; + uint32_t k2 = key; + + x2 ^= rol32(x1 & k1, 1); + x1 ^= x2 | k2; + return ((uint64_t)x1 << 32) | x2; +} + +static uint64_t camellia_flinv(uint64_t input, uint64_t key) +{ + uint32_t y1 = input >> 32; + uint32_t y2 = input; + uint32_t k1 = key >> 32; + uint32_t k2 = key; + + y1 ^= y2 | k2; + y2 ^= rol32(y1 & k1, 1); + return ((uint64_t)y1 << 32) | y2; +} + +static void octeon_camellia_round(MIPSOcteonCryptoState *crypto, uint64_t key) +{ + uint64_t left = crypto->aes_result[0]; + uint64_t right = crypto->aes_result[1]; + + crypto->aes_result[0] = right ^ camellia_f(left, key); + crypto->aes_result[1] = left; +} + +static void octeon_camellia_fl_layer(MIPSOcteonCryptoState *crypto, + uint64_t key, bool inverse) +{ + uint64_t state = crypto->aes_result[inverse ? 1 : 0]; + + crypto->aes_result[inverse ? 1 : 0] = inverse ? + camellia_flinv(state, key) : + camellia_fl(state, key); +} + static void octeon_sms4_crypt_common(MIPSOcteonCryptoState *crypto, bool encrypt, bool cbc) { @@ -2046,6 +2157,12 @@ void helper_octeon_cop2_dmtc2(CPUMIPSState *env, uint64_t value, case OCTEON_COP2_SEL_AES_KEYLENGTH: crypto->aes_keylen = data; break; + case OCTEON_COP2_SEL_CAMELLIA_FL: + octeon_camellia_fl_layer(crypto, data, false); + break; + case OCTEON_COP2_SEL_CAMELLIA_FLINV: + octeon_camellia_fl_layer(crypto, data, true); + break; case OCTEON_COP2_SEL_CRC_WRITE_POLYNOMIAL: case OCTEON_COP2_SEL_CRC_WRITE_POLYNOMIAL_REFLECT: crypto->crc_poly = data; @@ -2231,6 +2348,9 @@ void helper_octeon_cop2_dmtc2(CPUMIPSState *env, uint64_t value, crypto->aes_input[1] = data; octeon_aes_decrypt_common(crypto, false); break; + case OCTEON_COP2_SEL_CAMELLIA_ROUND: + octeon_camellia_round(crypto, data); + break; case OCTEON_COP2_SEL_SMS4_ENC_CBC1: crypto->aes_input[1] = data; octeon_sms4_crypt_common(crypto, true, true); diff --git a/target/mips/tcg/octeon_translate.c b/target/mips/tcg/octeon_translate.c index 9080d32223..a6ccdcb1df 100644 --- a/target/mips/tcg/octeon_translate.c +++ b/target/mips/tcg/octeon_translate.c @@ -111,6 +111,8 @@ static bool octeon_cop2_is_supported_dmtc2(uint16_t sel) case OCTEON_COP2_SEL_AES_DEC_CBC0: case OCTEON_COP2_SEL_AES_DEC0: case OCTEON_COP2_SEL_AES_KEYLENGTH: + case OCTEON_COP2_SEL_CAMELLIA_FL: + case OCTEON_COP2_SEL_CAMELLIA_FLINV: case OCTEON_COP2_SEL_CRC_WRITE_POLYNOMIAL: case OCTEON_COP2_SEL_CRC_IV: case OCTEON_COP2_SEL_CRC_WRITE_LEN: @@ -206,6 +208,7 @@ static bool octeon_cop2_is_supported_dmtc2(uint16_t sel) case OCTEON_COP2_SEL_AES_ENC1: case OCTEON_COP2_SEL_AES_DEC_CBC1: case OCTEON_COP2_SEL_AES_DEC1: + case OCTEON_COP2_SEL_CAMELLIA_ROUND: case OCTEON_COP2_SEL_SMS4_ENC_CBC1: case OCTEON_COP2_SEL_SMS4_ENC1: case OCTEON_COP2_SEL_SMS4_DEC_CBC1: -- 2.54.0
