While risking to be of too much annoyance, I would
like to persist on topic slightly further:

1. I'm using aes-128-cfb for media streaming and I
think it's rather good choice for the job.
2. Currently, aes-128-cfb works slower than it can (by
more than 20% and often beyond that) and suffers from
ecrypt/decrypt speed assymetry (36 MB/sec encryption
vs 30 MB/sec decryption on one of my machines - can be
of issue in life media streaming).
3. From my experience with gcc on powerpc, gcc handles
large unaligned load/stores correctly by splitting
them (sometimes unnecessary), but the code remains
correct and in working order.

Therefore, I would like to propose a patch using gcc
vector intrinsics when compiled with newer gcc and
falls back to the current version otherwise.
(I don't mind adding x86-only modifier to "if defined"
string, if needed).


__________________________________________________
Do You Yahoo!?
Tired of spam?  Yahoo! Mail has the best spam protection around 
http://mail.yahoo.com 
--- aes_cfb.c.prev      2004-12-30 21:43:33.000000000 +1100
+++ aes_cfb.c   2006-05-28 02:03:43.414593000 +1000
@@ -121,6 +121,67 @@
  * 128bit block we have used is contained in *num;
  */
 
+#if defined (__GNUC__) && __GNUC__ >= 3 && __GNUC_MINOR__ >= 3
+typedef int __v16qi __attribute__ ((mode (V16QI)));
+
+void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
+       const unsigned long length, const AES_KEY *key,
+       unsigned char *ivec, int *num, const int enc) {
+
+       unsigned int n, nr;
+       unsigned long l = 0;
+       unsigned char c;
+       __v16qi t_in;
+
+       assert(in && out && key && ivec && num);
+       n = *num;
+
+       if (enc) {
+               if (n) {
+                       for (; l < length; l++) {
+                               ivec[n] = out[l] = ivec[n] ^ in[l];
+                               if(!(n = (n + 1) % AES_BLOCK_SIZE)) break;
+                       }
+               }
+
+               for (; l + AES_BLOCK_SIZE <= length; l += AES_BLOCK_SIZE) {
+                       AES_encrypt(ivec, ivec, key);
+                       t_in = *(__v16qi*)(in + l);
+                       *(__v16qi*)(out + l) = *(__v16qi*)ivec ^ t_in;
+                       *(__v16qi*)(ivec) = *(__v16qi*)(out + l);
+               }
+
+               if(l < length) AES_encrypt(ivec, ivec, key);
+               for (; l < length; l++) {
+                       ivec[n++] = out[l] = ivec[n] ^ in[l];
+               }
+       } else {
+               if (n) {
+                       for (; l < length; l++) {
+                               c = in[l];
+                               out[l] = ivec[n] ^ in[l];
+                               ivec[n] = c;
+                               if(!(n = (n + 1) % AES_BLOCK_SIZE)) break;
+                       }
+               }
+
+               for (; l + AES_BLOCK_SIZE <= length; l += AES_BLOCK_SIZE) {
+                       AES_encrypt(ivec, ivec, key);
+                       t_in = *(__v16qi*)(in + l);
+                       *(__v16qi*)(out + l) = *(__v16qi*)ivec ^ t_in;
+                       *(__v16qi*)(ivec) = t_in;
+               }
+
+               if(l < length) AES_encrypt(ivec, ivec, key);
+               for (; l < length; l++) {
+                       c = in[l];
+                       out[l] = ivec[n] ^ in[l];
+                       ivec[n++] = c;
+               }
+       }
+       *num = n;
+}
+#else
 void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
        const unsigned long length, const AES_KEY *key,
        unsigned char *ivec, int *num, const int enc) {
@@ -155,6 +216,7 @@
 
        *num=n;
 }
+#endif
 
 /* This expects a single block of size nbits for both in and out. Note that
    it corrupts any extra bits in the last byte of out */

Reply via email to