While risking to be of too much annoyance, I would like to persist on topic slightly further:
1. I'm using aes-128-cfb for media streaming and I think it's rather good choice for the job. 2. Currently, aes-128-cfb works slower than it can (by more than 20% and often beyond that) and suffers from ecrypt/decrypt speed assymetry (36 MB/sec encryption vs 30 MB/sec decryption on one of my machines - can be of issue in life media streaming). 3. From my experience with gcc on powerpc, gcc handles large unaligned load/stores correctly by splitting them (sometimes unnecessary), but the code remains correct and in working order. Therefore, I would like to propose a patch using gcc vector intrinsics when compiled with newer gcc and falls back to the current version otherwise. (I don't mind adding x86-only modifier to "if defined" string, if needed). __________________________________________________ Do You Yahoo!? Tired of spam? Yahoo! Mail has the best spam protection around http://mail.yahoo.com
--- aes_cfb.c.prev 2004-12-30 21:43:33.000000000 +1100 +++ aes_cfb.c 2006-05-28 02:03:43.414593000 +1000 @@ -121,6 +121,67 @@ * 128bit block we have used is contained in *num; */ +#if defined (__GNUC__) && __GNUC__ >= 3 && __GNUC_MINOR__ >= 3 +typedef int __v16qi __attribute__ ((mode (V16QI))); + +void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out, + const unsigned long length, const AES_KEY *key, + unsigned char *ivec, int *num, const int enc) { + + unsigned int n, nr; + unsigned long l = 0; + unsigned char c; + __v16qi t_in; + + assert(in && out && key && ivec && num); + n = *num; + + if (enc) { + if (n) { + for (; l < length; l++) { + ivec[n] = out[l] = ivec[n] ^ in[l]; + if(!(n = (n + 1) % AES_BLOCK_SIZE)) break; + } + } + + for (; l + AES_BLOCK_SIZE <= length; l += AES_BLOCK_SIZE) { + AES_encrypt(ivec, ivec, key); + t_in = *(__v16qi*)(in + l); + *(__v16qi*)(out + l) = *(__v16qi*)ivec ^ t_in; + *(__v16qi*)(ivec) = *(__v16qi*)(out + l); + } + + if(l < length) AES_encrypt(ivec, ivec, key); + for (; l < length; l++) { + ivec[n++] = out[l] = ivec[n] ^ in[l]; + } + } else { + if (n) { + for (; l < length; l++) { + c = in[l]; + out[l] = ivec[n] ^ in[l]; + ivec[n] = c; + if(!(n = (n + 1) % AES_BLOCK_SIZE)) break; + } + } + + for (; l + AES_BLOCK_SIZE <= length; l += AES_BLOCK_SIZE) { + AES_encrypt(ivec, ivec, key); + t_in = *(__v16qi*)(in + l); + *(__v16qi*)(out + l) = *(__v16qi*)ivec ^ t_in; + *(__v16qi*)(ivec) = t_in; + } + + if(l < length) AES_encrypt(ivec, ivec, key); + for (; l < length; l++) { + c = in[l]; + out[l] = ivec[n] ^ in[l]; + ivec[n++] = c; + } + } + *num = n; +} +#else void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out, const unsigned long length, const AES_KEY *key, unsigned char *ivec, int *num, const int enc) { @@ -155,6 +216,7 @@ *num=n; } +#endif /* This expects a single block of size nbits for both in and out. Note that it corrupts any extra bits in the last byte of out */