https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89317
Bug ID: 89317 Summary: Ineffective code from std::copy Product: gcc Version: 9.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ Assignee: unassigned at gcc dot gnu.org Reporter: bugzi...@poradnik-webmastera.com Target Milestone: --- gcc produces ineffective code when std::copy is used to copy data. For test I created my own version of std::copy and this version is optimized properly. Compiles using g++ (GCC-Explorer-Build) 9.0.1 20190211 (experimental) Options: -O3 -std=c++11 -march=skylake [code] #include <stdint.h> #include <algorithm> #define Size 8 class Test { public: void test1(void*__restrict ptr); void test2(void*__restrict ptr); private: int16_t data1[Size]; int16_t data2[Size]; }; template<typename T1, typename T2> void mycopy(T1 begin, T1 end, T2 dest) { while (begin != end) { *dest = *begin; ++dest; ++begin; } } void Test::test1(void*__restrict ptr) { uint16_t* p = (uint16_t*)ptr; std::copy(data1, data1 + Size, p); p += Size; std::copy(data2, data2 + Size, p); } void Test::test2(void*__restrict ptr) { int16_t* p = (int16_t*)ptr; mycopy(data1, data1 + Size, p); p += Size; mycopy(data2, data2 + Size, p); } [/code] [asm] Test::test1(void*): movzx eax, WORD PTR [rdi] mov edx, 16 mov WORD PTR [rsi], ax movzx eax, WORD PTR [rdi+2] add rsi, 16 mov WORD PTR [rsi-14], ax movzx eax, WORD PTR [rdi+4] mov WORD PTR [rsi-12], ax movzx eax, WORD PTR [rdi+6] mov WORD PTR [rsi-10], ax movzx eax, WORD PTR [rdi+8] mov WORD PTR [rsi-8], ax movzx eax, WORD PTR [rdi+10] mov WORD PTR [rsi-6], ax movzx eax, WORD PTR [rdi+12] mov WORD PTR [rsi-4], ax movzx eax, WORD PTR [rdi+14] mov WORD PTR [rsi-2], ax mov rax, rdx sar rax test rdx, rdx jle .L69 movzx edx, WORD PTR [rdi+16] mov WORD PTR [rsi], dx cmp rax, 1 je .L69 movzx edx, WORD PTR [rdi+18] mov WORD PTR [rsi+2], dx cmp rax, 2 je .L69 movzx edx, WORD PTR [rdi+20] mov WORD PTR [rsi+4], dx cmp rax, 3 je .L69 movzx edx, WORD PTR [rdi+22] mov WORD PTR [rsi+6], dx cmp rax, 4 je .L69 movzx edx, WORD PTR [rdi+24] mov WORD PTR [rsi+8], dx cmp rax, 5 je .L69 movzx edx, WORD PTR [rdi+26] mov WORD PTR [rsi+10], dx cmp rax, 6 je .L69 movzx edx, WORD PTR [rdi+28] mov WORD PTR [rsi+12], dx cmp rax, 7 je .L69 movzx edx, WORD PTR [rdi+30] mov WORD PTR [rsi+14], dx cmp rax, 8 je .L69 movzx edx, WORD PTR [rdi+32] mov WORD PTR [rsi+16], dx cmp rax, 9 je .L69 movzx edx, WORD PTR [rdi+34] mov WORD PTR [rsi+18], dx cmp rax, 10 je .L69 movzx edx, WORD PTR [rdi+36] mov WORD PTR [rsi+20], dx cmp rax, 11 je .L69 movzx edx, WORD PTR [rdi+38] mov WORD PTR [rsi+22], dx cmp rax, 12 je .L69 movzx edx, WORD PTR [rdi+40] mov WORD PTR [rsi+24], dx cmp rax, 13 je .L69 movzx edx, WORD PTR [rdi+42] mov WORD PTR [rsi+26], dx cmp rax, 14 je .L69 movzx eax, WORD PTR [rdi+44] mov WORD PTR [rsi+28], ax .L69: ret Test::test2(void*): vmovdqu xmm0, XMMWORD PTR [rdi] vmovups XMMWORD PTR [rsi], xmm0 vmovdqu xmm1, XMMWORD PTR [rdi+16] vmovups XMMWORD PTR [rsi+16], xmm1 ret [/asm]