https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80293
Bug ID: 80293 Summary: g++ 5.4 -> 6.1 regression: unnecessary code at -O2 (-O1 is fine) Product: gcc Version: 6.2.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ Assignee: unassigned at gcc dot gnu.org Reporter: alex at weej dot com Target Milestone: --- Created attachment 41110 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=41110&action=edit repro.cpp Apologies for the poor summary, I really can't think of a good way to describe this. I'm using a couple of functions to conduit through an `std::array` representation of variable's memory (similar to type-punning through `std::memcpy`), the compiler even in -O2 is emitting a lot of unnecessary garbage. --- // g++ repro.cpp -c -o repro.o -std=gnu++11 -O2 -save-temps #include <type_traits> #include <array> #include <cstring> #include <cstdint> // Return a copy of the underlying memory of an arbitrary value. template < typename T, typename = typename std::enable_if<std::is_trivially_copyable<T>::value>::type > auto getMem( T const & value ) -> std::array<char, sizeof(T)> { auto ret = std::array<char, sizeof(T)>{}; std::memcpy(ret.data(), &value, sizeof(T)); return ret; } template < typename T, typename = typename std::enable_if<std::is_trivially_copyable<T>::value>::type > auto fromMem( std::array<char, sizeof(T)> const & buf ) -> T { auto ret = T{}; std::memcpy(&ret, buf.data(), sizeof(T)); return ret; } double foo1(std::uint64_t arg) { return fromMem<double>(getMem(arg)); } double foo2(std::uint64_t arg) { return *reinterpret_cast<double*>(&arg); } double foo3(std::uint64_t arg) { double ret; std::memcpy(&ret, &arg, sizeof(arg)); return ret; } --- In GCC 5.4 and older, as well as all versions of clang that I tested, all three `foo*` functions emit identical and extremely short code. But in GCC 6.1 and newer, `foo1` emits a load of extra instructions *only when -O2 or above is used*. --- Disassembly of section .text: 0000000000000000 <_Z4foo1m>: 0: 48 83 ec 28 sub $0x28,%rsp 4: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax b: 00 00 d: 48 89 44 24 18 mov %rax,0x18(%rsp) 12: 31 c0 xor %eax,%eax 14: 89 f8 mov %edi,%eax 16: 66 c1 e8 08 shr $0x8,%ax 1a: 89 c1 mov %eax,%ecx 1c: 89 f8 mov %edi,%eax 1e: c1 e8 18 shr $0x18,%eax 21: 89 c2 mov %eax,%edx 23: 31 c0 xor %eax,%eax 25: 40 88 f8 mov %dil,%al 28: 0f b6 d2 movzbl %dl,%edx 2b: 88 cc mov %cl,%ah 2d: 48 89 f9 mov %rdi,%rcx 30: 48 c1 e2 18 shl $0x18,%rdx 34: 81 e1 00 00 ff 00 and $0xff0000,%ecx 3a: 48 25 ff ff 00 ff and $0xffffffffff00ffff,%rax 40: 48 09 c8 or %rcx,%rax 43: 48 b9 ff ff ff 00 ff movabs $0xffffffff00ffffff,%rcx 4a: ff ff ff 4d: 48 21 c8 and %rcx,%rax 50: 48 b9 ff ff ff ff 00 movabs $0xffffff00ffffffff,%rcx 57: ff ff ff 5a: 48 09 d0 or %rdx,%rax 5d: 48 ba 00 00 00 00 ff movabs $0xff00000000,%rdx 64: 00 00 00 67: 48 21 fa and %rdi,%rdx 6a: 48 21 c8 and %rcx,%rax 6d: 48 b9 ff ff ff ff ff movabs $0xffff00ffffffffff,%rcx 74: 00 ff ff 77: 48 09 d0 or %rdx,%rax 7a: 48 ba 00 00 00 00 00 movabs $0xff0000000000,%rdx 81: ff 00 00 84: 48 21 fa and %rdi,%rdx 87: 48 21 c8 and %rcx,%rax 8a: 48 b9 ff ff ff ff ff movabs $0xff00ffffffffffff,%rcx 91: ff 00 ff 94: 48 09 d0 or %rdx,%rax 97: 48 ba 00 00 00 00 00 movabs $0xff000000000000,%rdx 9e: 00 ff 00 a1: 48 21 fa and %rdi,%rdx a4: 48 c1 ef 38 shr $0x38,%rdi a8: 48 21 c8 and %rcx,%rax ab: 48 c1 e7 38 shl $0x38,%rdi af: 48 09 d0 or %rdx,%rax b2: 48 89 fa mov %rdi,%rdx b5: 48 bf ff ff ff ff ff movabs $0xffffffffffffff,%rdi bc: ff ff 00 bf: 48 21 c7 and %rax,%rdi c2: 48 09 d7 or %rdx,%rdi c5: 48 8b 44 24 18 mov 0x18(%rsp),%rax ca: 64 48 33 04 25 28 00 xor %fs:0x28,%rax d1: 00 00 d3: 48 89 7c 24 08 mov %rdi,0x8(%rsp) d8: f2 0f 10 44 24 08 movsd 0x8(%rsp),%xmm0 de: 75 05 jne e5 <_Z4foo1m+0xe5> e0: 48 83 c4 28 add $0x28,%rsp e4: c3 retq e5: e8 00 00 00 00 callq ea <_Z4foo1m+0xea> ea: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 00000000000000f0 <_Z4foo2m>: f0: 48 89 7c 24 f8 mov %rdi,-0x8(%rsp) f5: f2 0f 10 44 24 f8 movsd -0x8(%rsp),%xmm0 fb: c3 retq fc: 0f 1f 40 00 nopl 0x0(%rax) 0000000000000100 <_Z4foo3m>: 100: 48 89 7c 24 f8 mov %rdi,-0x8(%rsp) 105: f2 0f 10 44 24 f8 movsd -0x8(%rsp),%xmm0 10b: c3 retq --- Tested this on g++ (Ubuntu 6.2.0-5ubuntu12) 6.2.0 20161005 x86-64. Also verified many versions with godbolt.org.