http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15419
Steven Bosscher <steven at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Last reconfirmed|2009-04-22 23:01:37 |2011-05-22 17:12:37 --- Comment #3 from Steven Bosscher <steven at gcc dot gnu.org> 2011-05-22 15:13:52 UTC --- $ cat t.c typedef long size_t; extern void *memcpy (void *__restrict __dest, __const void *__restrict __src, size_t __n) __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2))); typedef unsigned uint32_t; uint32_t get_uint32(const void *p) { uint32_t w; memcpy(&w, p, sizeof (uint32_t)); return w; } uint32_t get_uint32a(const void *p) { const struct { uint32_t w; } __attribute__((packed)) *wp = p; return wp->w; } $ ./cc1 -quiet -m32 -O2 t.c -fdump-tree-optimized $ cat t.s .file "t.c" .text .p2align 4,,15 .globl get_uint32 .type get_uint32, @function get_uint32: .LFB0: .cfi_startproc subl $16, %esp .cfi_def_cfa_offset 20 movl 20(%esp), %eax movl (%eax), %eax addl $16, %esp .cfi_def_cfa_offset 4 ret .cfi_endproc .LFE0: .size get_uint32, .-get_uint32 .p2align 4,,15 .globl get_uint32a .type get_uint32a, @function get_uint32a: .LFB1: .cfi_startproc movl 4(%esp), %eax movl (%eax), %eax ret .cfi_endproc .LFE1: .size get_uint32a, .-get_uint32a .ident "GCC: (GNU) 4.6.0 20110312 (experimental) [trunk revision 170907]" .section .note.GNU-stack,"",@progbits $ cat t.c.143t.optimized ;; Function get_uint32 (get_uint32) get_uint32 (const void * p) { uint32_t w; uint32_t D.1997; <bb 2>: memcpy (&w, p_2(D), 4); D.1997_3 = w; return D.1997_3; } ;; Function get_uint32a (get_uint32a) get_uint32a (const void * p) { uint32_t D.1994; <bb 2>: D.1994_3 = MEM[(const struct *)p_1(D)].w; return D.1994_3; } Isn't it possible to fold memcpy for selected small lengths to explicit memory stores? The code for x86_64 is identical for both functions.