http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55278



Marek Polacek <mpolacek at gcc dot gnu.org> changed:



           What    |Removed                     |Added

----------------------------------------------------------------------------

                 CC|                            |mpolacek at gcc dot gnu.org



--- Comment #3 from Marek Polacek <mpolacek at gcc dot gnu.org> 2013-05-07 
22:28:56 UTC ---

Reduced by Jakub to this:



unsigned char KASUMI_SBOX_S7[128];

unsigned short KASUMI_SBOX_S9[512];



static inline unsigned short

rotate_left (unsigned short input, unsigned long rot)

{

  return ((input << rot) | (input >> (8 * sizeof (unsigned short) - rot)));

}



static inline unsigned short

reverse_bytes (unsigned short val)

{

  return rotate_left (val, 8);

}



static inline unsigned short

load_be (const unsigned char in[], unsigned long off)

{

  return reverse_bytes (*((in) + off));

}



static inline void

store_be (unsigned short in, unsigned char out[2])

{

  *out = reverse_bytes (in);

}



static inline void

store_be4 (unsigned char out[], unsigned short x0, unsigned short x1,

      unsigned short x2, unsigned short x3)

{

  store_be (x0, out + (0 * sizeof (unsigned short)));

  store_be (x1, out + (1 * sizeof (unsigned short)));

  store_be (x2, out + (2 * sizeof (unsigned short)));

  store_be (x3, out + (3 * sizeof (unsigned short)));

}



unsigned short

FI (unsigned short I, unsigned short K)

{

  unsigned short D9 = (I >> 7);

  unsigned char D7 = (I & 0x7F);

  D9 = KASUMI_SBOX_S9[D9] ^ D7;

  D7 = KASUMI_SBOX_S7[D7] ^ (D9 & 0x7F);



  D7 ^= (K >> 9);

  D9 = KASUMI_SBOX_S9[D9 ^ (K & 0x1FF)] ^ D7;

  D7 = KASUMI_SBOX_S7[D7] ^ (D9 & 0x7F);

  return (D7 << 9) | D9;

}



__attribute__((noinline, noclone))

void

encrypt_n (unsigned short **EK, const unsigned char in[], unsigned char out[],

       unsigned long blocks)

{

  unsigned long i, j;

  for (i = 0; i != blocks; ++i)

    {

      unsigned short B0 = load_be (in, 0);

      unsigned short B1 = load_be (in, 1);

      unsigned short B2 = load_be (in, 2);

      unsigned short B3 = load_be (in, 3);

      for (j = 0; j != 8; j += 2)

    {

      const unsigned short *K = &(*EK)[8 * j];

      unsigned short R = B1 ^ (rotate_left (B0, 1) & K[0]);

      unsigned short L = B0 ^ (rotate_left (R, 1) | K[1]);

      L = FI (L ^ K[2], K[3]) ^ R;

      R = FI (R ^ K[4], K[5]) ^ L;

      L = FI (L ^ K[6], K[7]) ^ R;

      R = B2 ^= R;

      L = B3 ^= L;

      R = FI (R ^ K[10], K[11]) ^ L;

      L = FI (L ^ K[12], K[13]) ^ R;

      R = FI (R ^ K[14], K[15]) ^ L;

      R ^= (rotate_left (L, 1) & K[8]);

      L ^= (rotate_left (R, 1) | K[9]);

      B0 ^= L;

      B1 ^= R;

    }

      store_be4 (out, B0, B1, B2, B3);

      in += 8;

      out += 8;

    }

}



unsigned char in[4096], out[4096];



int

main ()

{

  unsigned short EKb[64], *EK = EKb;

  __builtin_memset (EKb, 0, sizeof EKb);

  asm volatile ("" : : : "memory");

  int i;

  for (i = 0; i < 100000; i++)

    encrypt_n (&EK, in, out, 4096 / 8);

  return 0;

}



Using -O3  -finline-functions  -D_REENTRANT -Wno-long-long -W -Wall -fPIC

-fvisibility=hidden, then trunk gcc:

$ time ./a.out 

real    0m15.727s

user    0m15.639s

sys    0m0.004s



while with trunk clang:

$ time ./a.out 

real    0m7.864s

user    0m7.796s

sys    0m0.005s

Reply via email to