This is an automated email from the ASF dual-hosted git repository. utzig pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mynewt-core.git
commit 6897f5d37b25534ba2282dfc674831082c6534c9 Author: Fabio Utzig <ut...@apache.org> AuthorDate: Fri Jan 31 10:00:19 2020 -0300 crypto: use 32-bit XOR in CBC/CTR Using 32-bit XORs is around 10% faster with slightly increased code size (for optimized target). For CBC always requires AES block length buffers so just employ 32-bit XORs. For CTR due to the stream semantics, use 32-bit XOR when buffer is AES block length size, otherwise fallback to byte-by-byte XOR. Signed-off-by: Fabio Utzig <ut...@apache.org> --- hw/drivers/crypto/src/crypto.c | 69 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 7 deletions(-) diff --git a/hw/drivers/crypto/src/crypto.c b/hw/drivers/crypto/src/crypto.c index 638b032..8409cee 100644 --- a/hw/drivers/crypto/src/crypto.c +++ b/hw/drivers/crypto/src/crypto.c @@ -19,6 +19,8 @@ #include "crypto/crypto.h" +#include <stdint.h> + /* * Implement modes using ECB for non-available HW support */ @@ -35,6 +37,12 @@ crypto_do_ctr(struct crypto_dev *crypto, const void *key, uint16_t keylen, uint8_t *inbuf8 = (uint8_t *)inbuf; uint8_t _nonce[AES_BLOCK_LEN]; uint8_t _out[AES_BLOCK_LEN]; +#if defined(__ARM_FEATURE_UNALIGNED) + /* accelerate operations doing 32-bit XORs */ + uint32_t *outbuf32; + uint32_t *inbuf32; + uint32_t *_out32 = (uint32_t *)_out; +#endif int rc; if (crypto->interface.encrypt == NULL) { @@ -56,9 +64,25 @@ crypto_do_ctr(struct crypto_dev *crypto, const void *key, uint16_t keylen, return sz + rc; } - for (i = 0; i < len; i++) { - outbuf8[i] = inbuf8[i] ^ _out[i]; +#if defined(__ARM_FEATURE_UNALIGNED) + /* + * For full blocks increase speed by doing 32-bit XOR; maintain the + * stream semantics doing byte XORs for smaller sizes (end of buffer). + */ + if (len == AES_BLOCK_LEN) { + inbuf32 = (uint32_t *)inbuf8; + outbuf32 = (uint32_t *)outbuf8; + for (i = 0; i < len / 4; i++) { + outbuf32[i] = inbuf32[i] ^ _out32[i]; + } + } else { +#endif + for (i = 0; i < len; i++) { + outbuf8[i] = inbuf8[i] ^ _out[i]; + } +#if defined(__ARM_FEATURE_UNALIGNED) } +#endif for (i = AES_BLOCK_LEN; i > 0; --i) { if (++_nonce[i - 1] != 0) { @@ -86,12 +110,18 @@ crypto_do_cbc(struct crypto_dev *crypto, uint8_t op, const void *key, size_t remain; uint32_t i; uint32_t j; - uint8_t tmp[AES_BLOCK_LEN]; const uint8_t *ivp; uint8_t iv_save[AES_BLOCK_LEN * 2]; uint8_t ivpos; uint8_t *outbuf8 = (uint8_t *)outbuf; const uint8_t *inbuf8 = (const uint8_t *)inbuf; +#if defined(__ARM_FEATURE_UNALIGNED) + uint32_t tmp32[AES_BLOCK_LEN / 4]; + uint32_t *outbuf32 = (uint32_t *)outbuf; + const uint32_t *inbuf32 = (uint32_t *)inbuf; +#else + uint8_t tmp[AES_BLOCK_LEN]; +#endif bool inplace; int rc; @@ -118,13 +148,25 @@ crypto_do_cbc(struct crypto_dev *crypto, uint8_t op, const void *key, } if (op == CRYPTO_OP_ENCRYPT) { +#if defined(__ARM_FEATURE_UNALIGNED) + for (j = 0; j < AES_BLOCK_LEN / 4; j++) { + tmp32[j] = ((uint32_t *)ivp)[j] ^ inbuf32[(i / 4) + j]; + } +#else for (j = 0; j < AES_BLOCK_LEN; j++) { tmp[j] = ivp[j] ^ inbuf8[j+i]; } +#endif rc = crypto->interface.encrypt(crypto, CRYPTO_ALGO_AES, - CRYPTO_MODE_ECB, (const uint8_t *)key, keylen, NULL, tmp, - &outbuf8[i], AES_BLOCK_LEN); + CRYPTO_MODE_ECB, + (const uint8_t *)key, keylen, NULL, +#if defined(__ARM_FEATURE_UNALIGNED) + (uint8_t *)tmp32, +#else + tmp, +#endif + &outbuf8[i], AES_BLOCK_LEN); if (rc != AES_BLOCK_LEN) { return rc; } @@ -132,8 +174,15 @@ crypto_do_cbc(struct crypto_dev *crypto, uint8_t op, const void *key, ivp = &outbuf8[i]; } else { rc = crypto->interface.decrypt(crypto, CRYPTO_ALGO_AES, - CRYPTO_MODE_ECB, (const uint8_t *)key, keylen, NULL, - &inbuf8[i], tmp, AES_BLOCK_LEN); + CRYPTO_MODE_ECB, + (const uint8_t *)key, keylen, NULL, + &inbuf8[i], +#if defined(__ARM_FEATURE_UNALIGNED) + (uint8_t *)tmp32, +#else + tmp, +#endif + AES_BLOCK_LEN); if (rc != AES_BLOCK_LEN) { return rc; } @@ -142,9 +191,15 @@ crypto_do_cbc(struct crypto_dev *crypto, uint8_t op, const void *key, memcpy(&iv_save[ivpos], &inbuf8[i], AES_BLOCK_LEN); } +#if defined(__ARM_FEATURE_UNALIGNED) + for (j = 0; j < AES_BLOCK_LEN / 4; j++) { + outbuf32[(i / 4) + j] = ((uint32_t *)ivp)[j] ^ tmp32[j]; + } +#else for (j = 0; j < AES_BLOCK_LEN; j++) { outbuf8[i+j] = ivp[j] ^ tmp[j]; } +#endif if (inplace) { ivp = &iv_save[ivpos];