On Fri, Nov 17, 2006 at 04:30:53PM -0700, Shaun Jackman wrote: > Is there anything I can do to help GCC along here? I'm using GCC 4.1.0 > with -O2. > > I won't bother to show bswap_32 here, which produces a real disaster! > Think 47 instructions, for what should be 6.
You may get better code if you write it something like this: #include <string.h> uint32_t bswap_32 (uint32_t x) { unsigned char c[4], temp; memcpy (c, &x, 4); temp = c[0]; c[0] = c[3]; c[3] = temp; temp = c[1]; c[1] = c[2]; c[2] = temp; memcpy (&x, c, 4); return (x); } It isn't only on the AVR that bswap_32() is nontrivial to get right. These two versions would rule on the i386 if GCC would be just a little bit smarter: #include <string.h> #define BSWAP_16(x) \ ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8)) uint32_t bswap_32_a (uint32_t y) { uint16_t d[2]; uint32_t x = y; memcpy (d, &x, sizeof (d)); d[0] = BSWAP_16 (d[0]); memcpy (&x, d, sizeof (x)); x = ((x >> 16) & 0xffff) | ((x & 0xffff) << 16); memcpy (d, &x, sizeof (d)); d[0] = BSWAP_16 (d[0]); memcpy (&x, d, sizeof (x)); return (x); } /* bswap_32_a: subl $16, %esp movl 20(%esp), %eax movl %eax, 12(%esp) rolw $8, 12(%esp) roll $16, 12(%esp) rolw $8, 12(%esp) movl 12(%esp), %eax addl $16, %esp ret */ uint32_t bswap_32_b (uint32_t y) { union { uint16_t d[2]; uint32_t x; } t; t.x = y; t.d[0] = BSWAP_16 (t.d[0]); t.x = ((t.x >> 16) & 0xffff) | ((t.x & 0xffff) << 16); t.d[0] = BSWAP_16 (t.d[0]); return (t.x); } /* bswap_32_b: movl 4(%esp), %edx movl %edx, %eax rolw $8, %ax movw %ax, %dx movl %edx, %eax roll $16, %eax movl %eax, %edx rolw $8, %ax movw %ax, %dx movl %edx, %eax ret */ -- Rask Ingemann Lambertsen