| Issue |
163788
|
| Summary |
[X86] Failure to use 128/256/512-bit vector types for very large integer logic ops
|
| Labels |
backend:X86,
llvm:SelectionDAG
|
| Assignees |
|
| Reporter |
RKSimon
|
Noticed when playing with _BitInt() for very large integers
https://zig.godbolt.org/z/x1qMfc3G7
```ll
define void @and_i512_mem(ptr %p0, ptr %p1, ptr %p2) {
%a0 = load i512, ptr %p0
%a1 = load i512, ptr %p1
%res = and i512 %a0, %a1
store i512 %res, ptr %p2
ret void
}
```
```asm
and_i512_mem: # @and_i512_mem
pushq %rbx
movq 48(%rdi), %rax
movq 56(%rdi), %rcx
movq 32(%rdi), %r8
movq 40(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq (%rdi), %rbx
movq 8(%rdi), %rdi
andq 8(%rsi), %rdi
andq (%rsi), %rbx
andq 24(%rsi), %r11
andq 16(%rsi), %r10
andq 40(%rsi), %r9
andq 32(%rsi), %r8
andq 56(%rsi), %rcx
andq 48(%rsi), %rax
movq %rax, 48(%rdx)
movq %rcx, 56(%rdx)
movq %r8, 32(%rdx)
movq %r9, 40(%rdx)
movq %r10, 16(%rdx)
movq %r11, 24(%rdx)
movq %rbx, (%rdx)
movq %rdi, 8(%rdx)
popq %rbx
retq
```
when on a a avx512 target it could be a simple ZMM load, folded-and + store sequence (or split XMM/YMM sequence on SSE/AVX1 targets).
We could attempt this in SLP/vectorcombine or leave it to the DAG and handle it prior to type legalisation.
For cases where part of the args are passed by register we'd need to either to decide if the BUILD_VECTOR was worth it, or if there's a shadow stack variable we can load from instead:
```ll
define i512 @and_i512(i512 %a0, i512 %a1) {
%res = and i512 %a0, %a1
ret i512 %res
}
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs