vapier 14/03/14 12:12:13 Added: openssh-6.5_p1-hpn-cipher-align.patch Log: Avoid unaligned loads/stores in USE=hpn cipher code #498632 by Bruno. (Portage version: 2.2.8-r1/cvs/Linux x86_64, signed Manifest commit with key D2E96200)
Revision Changes Path 1.1 net-misc/openssh/files/openssh-6.5_p1-hpn-cipher-align.patch file : http://sources.gentoo.org/viewvc.cgi/gentoo-x86/net-misc/openssh/files/openssh-6.5_p1-hpn-cipher-align.patch?rev=1.1&view=markup plain: http://sources.gentoo.org/viewvc.cgi/gentoo-x86/net-misc/openssh/files/openssh-6.5_p1-hpn-cipher-align.patch?rev=1.1&content-type=text/plain Index: openssh-6.5_p1-hpn-cipher-align.patch =================================================================== https://bugs.gentoo.org/498632 make sure we do not use unaligned loads/stores as some arches really hate that. --- a/cipher-ctr-mt.c +++ b/cipher-ctr-mt.c @@ -58,8 +58,10 @@ /* Collect thread stats and print at cancellation when in debug mode */ /* #define CIPHER_THREAD_STATS */ -/* Use single-byte XOR instead of 8-byte XOR */ -/* #define CIPHER_BYTE_XOR */ +/* Can the system do unaligned loads natively? */ +#if defined(__x86_64__) || defined(__i386__) +# define CIPHER_UNALIGNED_OK +#endif /*-------------------- END TUNABLES --------------------*/ @@ -285,8 +286,18 @@ thread_loop(void *x) static int ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src, - u_int len) + size_t len) { + typedef union { + __uint128_t *u128; + uint64_t *u64; + uint32_t *u32; + uint8_t *u8; + const uint8_t *cu8; + uintptr_t u; + } ptrs_t; + ptrs_t destp, srcp, bufp; + uintptr_t align; struct ssh_aes_ctr_ctx *c; struct kq *q, *oldq; int ridx; @@ -301,35 +312,38 @@ ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src, ridx = c->ridx; /* src already padded to block multiple */ + srcp.cu8 = src; + destp.u8 = dest; while (len > 0) { buf = q->keys[ridx]; + bufp.u8 = buf; -#ifdef CIPHER_BYTE_XOR - dest[0] = src[0] ^ buf[0]; - dest[1] = src[1] ^ buf[1]; - dest[2] = src[2] ^ buf[2]; - dest[3] = src[3] ^ buf[3]; - dest[4] = src[4] ^ buf[4]; - dest[5] = src[5] ^ buf[5]; - dest[6] = src[6] ^ buf[6]; - dest[7] = src[7] ^ buf[7]; - dest[8] = src[8] ^ buf[8]; - dest[9] = src[9] ^ buf[9]; - dest[10] = src[10] ^ buf[10]; - dest[11] = src[11] ^ buf[11]; - dest[12] = src[12] ^ buf[12]; - dest[13] = src[13] ^ buf[13]; - dest[14] = src[14] ^ buf[14]; - dest[15] = src[15] ^ buf[15]; +#ifdef CIPHER_UNALIGNED_OK + destp.u128[0] = srcp.u128[0] ^ bufp.u128[0]; #else - *(uint64_t *)dest = *(uint64_t *)src ^ *(uint64_t *)buf; - *(uint64_t *)(dest + 8) = *(uint64_t *)(src + 8) ^ - *(uint64_t *)(buf + 8); + /* figure out the alignment on the fly */ + align = destp.u | srcp.u | bufp.u; + + if ((align & 0xf) == 0) { + destp.u128[0] = srcp.u128[0] ^ bufp.u128[0]; + } else if ((align & 0x7) == 0) { + destp.u64[0] = srcp.u64[0] ^ bufp.u64[0]; + destp.u64[1] = srcp.u64[1] ^ bufp.u64[1]; + } else if ((align & 0x3) == 0) { + destp.u32[0] = srcp.u32[0] ^ bufp.u32[0]; + destp.u32[1] = srcp.u32[1] ^ bufp.u32[1]; + destp.u32[2] = srcp.u32[2] ^ bufp.u32[2]; + destp.u32[3] = srcp.u32[3] ^ bufp.u32[3]; + } else { + size_t i; + for (i = 0; i < AES_BLOCK_SIZE; ++i) + dest[i] = src[i] ^ buf[i]; + } #endif - dest += 16; - src += 16; - len -= 16; + destp.u += AES_BLOCK_SIZE; + srcp.u += AES_BLOCK_SIZE; + len -= AES_BLOCK_SIZE; ssh_ctr_inc(ctx->iv, AES_BLOCK_SIZE); /* Increment read index, switch queues on rollover */