While risking to be of too much annoyance, I would
like to persist on topic slightly further:
1. I'm using aes-128-cfb for media streaming and I
think it's rather good choice for the job.
2. Currently, aes-128-cfb works slower than it can (by
more than 20% and often beyond that) and suffers from
ecrypt/decrypt speed assymetry (36 MB/sec encryption
vs 30 MB/sec decryption on one of my machines - can be
of issue in life media streaming).
3. From my experience with gcc on powerpc, gcc handles
large unaligned load/stores correctly by splitting
them (sometimes unnecessary), but the code remains
correct and in working order.
Therefore, I would like to propose a patch using gcc
vector intrinsics when compiled with newer gcc and
falls back to the current version otherwise.
(I don't mind adding x86-only modifier to "if defined"
string, if needed).
__________________________________________________
Do You Yahoo!?
Tired of spam? Yahoo! Mail has the best spam protection around
http://mail.yahoo.com
--- aes_cfb.c.prev 2004-12-30 21:43:33.000000000 +1100
+++ aes_cfb.c 2006-05-28 02:03:43.414593000 +1000
@@ -121,6 +121,67 @@
* 128bit block we have used is contained in *num;
*/
+#if defined (__GNUC__) && __GNUC__ >= 3 && __GNUC_MINOR__ >= 3
+typedef int __v16qi __attribute__ ((mode (V16QI)));
+
+void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned long length, const AES_KEY *key,
+ unsigned char *ivec, int *num, const int enc) {
+
+ unsigned int n, nr;
+ unsigned long l = 0;
+ unsigned char c;
+ __v16qi t_in;
+
+ assert(in && out && key && ivec && num);
+ n = *num;
+
+ if (enc) {
+ if (n) {
+ for (; l < length; l++) {
+ ivec[n] = out[l] = ivec[n] ^ in[l];
+ if(!(n = (n + 1) % AES_BLOCK_SIZE)) break;
+ }
+ }
+
+ for (; l + AES_BLOCK_SIZE <= length; l += AES_BLOCK_SIZE) {
+ AES_encrypt(ivec, ivec, key);
+ t_in = *(__v16qi*)(in + l);
+ *(__v16qi*)(out + l) = *(__v16qi*)ivec ^ t_in;
+ *(__v16qi*)(ivec) = *(__v16qi*)(out + l);
+ }
+
+ if(l < length) AES_encrypt(ivec, ivec, key);
+ for (; l < length; l++) {
+ ivec[n++] = out[l] = ivec[n] ^ in[l];
+ }
+ } else {
+ if (n) {
+ for (; l < length; l++) {
+ c = in[l];
+ out[l] = ivec[n] ^ in[l];
+ ivec[n] = c;
+ if(!(n = (n + 1) % AES_BLOCK_SIZE)) break;
+ }
+ }
+
+ for (; l + AES_BLOCK_SIZE <= length; l += AES_BLOCK_SIZE) {
+ AES_encrypt(ivec, ivec, key);
+ t_in = *(__v16qi*)(in + l);
+ *(__v16qi*)(out + l) = *(__v16qi*)ivec ^ t_in;
+ *(__v16qi*)(ivec) = t_in;
+ }
+
+ if(l < length) AES_encrypt(ivec, ivec, key);
+ for (; l < length; l++) {
+ c = in[l];
+ out[l] = ivec[n] ^ in[l];
+ ivec[n++] = c;
+ }
+ }
+ *num = n;
+}
+#else
void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
const unsigned long length, const AES_KEY *key,
unsigned char *ivec, int *num, const int enc) {
@@ -155,6 +216,7 @@
*num=n;
}
+#endif
/* This expects a single block of size nbits for both in and out. Note that
it corrupts any extra bits in the last byte of out */