| Issue |
170039
|
| Summary |
Inefficient AVX512 code for simple count zero-bytes loop.
|
| Labels |
|
| Assignees |
|
| Reporter |
the8472
|
https://rust.godbolt.org/z/TY7hT19oc
```rust
#[inline(never)]
pub fn foo(bar: &[u8; 32]) -> u16 {
u16::try_from(bar.iter().filter(|v| **v == 0).count()).unwrap()
}
```
<details><summary>znver5</summary>
```assembly
example[f5d875b554c12ca0]::foo:
push rbp
push r15
push r14
push r13
push r12
push rbx
vmovdqu ymm0, ymmword ptr [rdi]
vptestnmb k0, ymm0, ymm0
kshiftrd k1, k0, 1
kmovd r9d, k0
kmovd ecx, k1
kshiftrd k1, k0, 2
and r9d, 1
kmovd r13d, k1
kshiftrd k1, k0, 3
and ecx, 1
kmovd esi, k1
kshiftrd k1, k0, 4
add ecx, r9d
and r13d, 1
kmovd ebx, k1
kshiftrd k1, k0, 5
and esi, 1
kmovd r15d, k1
kshiftrd k1, k0, 6
and ebx, 1
add esi, r13d
kmovd edx, k1
kshiftrd k1, k0, 7
and r15d, 1
add esi, ecx
kmovd ebp, k1
kshiftrd k1, k0, 8
add r15d, ebx
and edx, 1
kmovd r12d, k1
kshiftrd k1, k0, 9
add edx, r15d
and ebp, 1
kmovd r14d, k1
kshiftrd k1, k0, 10
add edx, esi
and r12d, 1
kmovd eax, k1
kshiftrd k1, k0, 11
add r12d, ebp
and r14d, 1
kmovd edi, k1
kshiftrd k1, k0, 12
add r14d, r12d
and eax, 1
kmovd r11d, k1
kshiftrd k1, k0, 13
mov dword ptr [rsp - 8], edi
add eax, r14d
kmovd r8d, k1
kshiftrd k1, k0, 14
and r11d, 1
add eax, edx
kmovd edi, k1
kshiftrd k1, k0, 15
and r8d, 1
kmovd r9d, k1
kshiftrd k1, k0, 16
and edi, 1
kmovd r10d, k1
kshiftrd k1, k0, 17
and r9d, 1
mov dword ptr [rsp - 4], r10d
kmovd r10d, k1
kshiftrd k1, k0, 18
kmovd ebx, k1
kshiftrd k1, k0, 19
and r10d, 1
kmovd r15d, k1
kshiftrd k1, k0, 20
and ebx, 1
kmovd r13d, k1
kshiftrd k1, k0, 21
and r15d, 1
kmovd esi, k1
kshiftrd k1, k0, 22
and r13d, 1
kmovd ecx, k1
kshiftrd k1, k0, 23
and esi, 1
mov dword ptr [rsp - 12], ecx
mov ecx, dword ptr [rsp - 8]
kmovd r14d, k1
kshiftrd k1, k0, 24
kmovd edx, k1
kshiftrd k1, k0, 25
and r14d, 1
kmovd ebp, k1
kshiftrd k1, k0, 26
and edx, 1
and ebp, 1
and ecx, 1
add r11d, ecx
add r8d, r11d
kmovd r11d, k1
kshiftrd k1, k0, 27
add edi, r8d
kmovd r8d, k1
kshiftrd k1, k0, 28
and r11d, 1
add r9d, edi
kmovd edi, k1
kshiftrd k1, k0, 29
and r8d, 1
add r9d, eax
mov eax, dword ptr [rsp - 4]
kmovd r12d, k1
kshiftrd k1, k0, 30
kshiftrd k0, k0, 31
and edi, 1
kmovd ecx, k1
and r12d, 1
and ecx, 1
add ecx, r12d
and eax, 1
add r10d, eax
kmovd eax, k0
add ebx, r10d
mov r10d, dword ptr [rsp - 12]
and eax, 1
add r15d, ebx
add eax, ecx
add r13d, r15d
add esi, r13d
add esi, r9d
and r10d, 1
add r14d, r10d
add edx, r14d
add ebp, edx
add r11d, ebp
add r8d, r11d
add edi, r8d
add edi, esi
add eax, edi
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
vzeroupper
ret
```
</details>
<details><summary>znver2</summary>
```assembly
example[f5d875b554c12ca0]::foo:
vpxor xmm0, xmm0, xmm0
vpcmpeqb ymm0, ymm0, ymmword ptr [rdi]
vpmovmskb eax, ymm0
popcnt eax, eax
vzeroupper
ret
```
</details>
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs