Issue |
149298
|
Summary |
clang missed vectorization optization
|
Labels |
clang
|
Assignees |
|
Reporter |
rockeet
|
```c++
#include <string.h>
#include <utility>
#define extent(a) sizeof(a)/sizeof(a[0])
struct alignas(16) B {
int a[12];
};
void B_swap1(B* x, B* y) {
B t;
memcpy(&t, x, sizeof(B));
memcpy( x, y, sizeof(B));
memcpy( y, &t, sizeof(B));
}
void B_swap2(B* x, B* y) {
for (int i = 0; i < extent(x->a); i++) {
std::swap(x->a[i], y->a[i]);
}
}
```
### clang generates (-O3)
```nasm
B_swap1(B*, B*):
movaps xmm0, xmmword ptr [rdi]
movaps xmm1, xmmword ptr [rdi + 16]
movaps xmm2, xmmword ptr [rdi + 32]
movaps xmmword ptr [rsp - 24], xmm2
movaps xmmword ptr [rsp - 40], xmm1
movaps xmmword ptr [rsp - 56], xmm0
movaps xmm0, xmmword ptr [rsi]
movaps xmm1, xmmword ptr [rsi + 16]
movaps xmm2, xmmword ptr [rsi + 32]
movaps xmmword ptr [rdi + 32], xmm2
movaps xmmword ptr [rdi + 16], xmm1
movaps xmmword ptr [rdi], xmm0
movaps xmm0, xmmword ptr [rsp - 56]
movaps xmm1, xmmword ptr [rsp - 40]
movaps xmm2, xmmword ptr [rsp - 24]
movaps xmmword ptr [rsi + 32], xmm2
movaps xmmword ptr [rsi + 16], xmm1
movaps xmmword ptr [rsi], xmm0
ret
B_swap2(B*, B*):
mov eax, dword ptr [rdi]
mov ecx, dword ptr [rsi]
mov dword ptr [rdi], ecx
mov dword ptr [rsi], eax
mov eax, dword ptr [rdi + 4]
mov ecx, dword ptr [rsi + 4]
mov dword ptr [rdi + 4], ecx
mov dword ptr [rsi + 4], eax
mov eax, dword ptr [rdi + 8]
mov ecx, dword ptr [rsi + 8]
mov dword ptr [rdi + 8], ecx
mov dword ptr [rsi + 8], eax
mov eax, dword ptr [rdi + 12]
mov ecx, dword ptr [rsi + 12]
mov dword ptr [rdi + 12], ecx
mov dword ptr [rsi + 12], eax
mov eax, dword ptr [rdi + 16]
mov ecx, dword ptr [rsi + 16]
mov dword ptr [rdi + 16], ecx
mov dword ptr [rsi + 16], eax
mov eax, dword ptr [rdi + 20]
mov ecx, dword ptr [rsi + 20]
mov dword ptr [rdi + 20], ecx
mov dword ptr [rsi + 20], eax
mov eax, dword ptr [rdi + 24]
mov ecx, dword ptr [rsi + 24]
mov dword ptr [rdi + 24], ecx
mov dword ptr [rsi + 24], eax
mov eax, dword ptr [rdi + 28]
mov ecx, dword ptr [rsi + 28]
mov dword ptr [rdi + 28], ecx
mov dword ptr [rsi + 28], eax
mov eax, dword ptr [rdi + 32]
mov ecx, dword ptr [rsi + 32]
mov dword ptr [rdi + 32], ecx
mov dword ptr [rsi + 32], eax
mov eax, dword ptr [rdi + 36]
mov ecx, dword ptr [rsi + 36]
mov dword ptr [rdi + 36], ecx
mov dword ptr [rsi + 36], eax
mov eax, dword ptr [rdi + 40]
mov ecx, dword ptr [rsi + 40]
mov dword ptr [rdi + 40], ecx
mov dword ptr [rsi + 40], eax
mov eax, dword ptr [rdi + 44]
mov ecx, dword ptr [rsi + 44]
mov dword ptr [rdi + 44], ecx
mov dword ptr [rsi + 44], eax
```
### g++ generates (-O3)
```nasm
B_swap1(B*, B*):
movdqu xmm3, XMMWORD PTR [rsi]
movdqu xmm2, XMMWORD PTR [rdi]
movdqu xmm1, XMMWORD PTR [rdi+16]
movdqu xmm0, XMMWORD PTR [rdi+32]
movups XMMWORD PTR [rdi], xmm3
movdqu xmm3, XMMWORD PTR [rsi+16]
movups XMMWORD PTR [rdi+16], xmm3
movdqu xmm3, XMMWORD PTR [rsi+32]
movups XMMWORD PTR [rdi+32], xmm3
movups XMMWORD PTR [rsi], xmm2
movups XMMWORD PTR [rsi+16], xmm1
movups XMMWORD PTR [rsi+32], xmm0
ret
B_swap2(B*, B*):
movdqa xmm0, XMMWORD PTR [rdi]
movdqa xmm1, XMMWORD PTR [rsi]
movaps XMMWORD PTR [rdi], xmm1
movdqa xmm1, XMMWORD PTR [rsi+16]
movaps XMMWORD PTR [rsi], xmm0
movdqa xmm0, XMMWORD PTR [rdi+16]
movaps XMMWORD PTR [rdi+16], xmm1
movdqa xmm1, XMMWORD PTR [rsi+32]
movaps XMMWORD PTR [rsi+16], xmm0
movdqa xmm0, XMMWORD PTR [rdi+32]
movaps XMMWORD PTR [rdi+32], xmm1
movaps XMMWORD PTR [rsi+32], xmm0
ret
```
Although g++ memcpy did not recognize the alignas in B_swap1, it is far more better than clang.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs