https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125120
Bug ID: 125120
Summary: Missed auto-vectorization/memcpy replacement in
__builtin_unreachable() context
Product: gcc
Version: 15.2.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: nicula at nicula dot xyz
Target Milestone: ---
Here's some code that tries to emulate a `push_back_unchecked()` function by
using a `__builtin_unreachable()` guard on the condition used by the *checked*
`push_back()` function.
The compiler recognizes `v1` as a `memcpy()`. Although the `abort()` branch is
correctly omitted from the assembly of `v2`, the final codegen is not as
expected (either `memcpy()` pattern recognition or successful
auto-vectorization).
Code:
#include <cstdlib>
template<typename T>
struct Vec {
T *data{};
size_t len{};
size_t capacity{};
bool full() const {
return len == capacity;
}
void push_back(const T &value) {
if (full()) {
abort();
}
data[len] = value;
++len;
}
void push_back_unchecked(const T &value) {
data[len] = value;
++len;
}
};
void v1(Vec<int> &__restrict dst, const int *__restrict src, size_t len) {
for (size_t i = 0; i != len; ++i) {
dst.push_back_unchecked(src[i]);
}
}
void v2(Vec<int> &__restrict dst, const int *__restrict src, size_t len) {
for (size_t i = 0; i != len; ++i) {
if (dst.full()) {
__builtin_unreachable();
}
dst.push_back(src[i]);
}
}
Assembly:
v1(Vec<int>&, int const*, unsigned long):
test rdx, rdx
je .L8
push r12
push rbp
push rbx
mov r12, QWORD PTR [rdi+8]
mov rax, QWORD PTR [rdi]
mov rbx, rdx
mov rbp, rdi
lea rdx, [0+rdx*4]
lea rdi, [rax+r12*4]
add r12, rbx
call memcpy
mov QWORD PTR [rbp+8], r12
pop rbx
pop rbp
pop r12
ret
.L8:
ret
v2(Vec<int>&, int const*, unsigned long):
test rdx, rdx
je .L17
mov rax, QWORD PTR [rdi+8]
mov rcx, rax
add rdx, rax
neg rcx
lea r8, [rsi+rcx*4]
mov rsi, QWORD PTR [rdi]
.L14:
mov ecx, DWORD PTR [r8+rax*4]
mov DWORD PTR [rsi+rax*4], ecx
inc rax
mov QWORD PTR [rdi+8], rax
cmp rdx, rax
jne .L14
.L17:
ret