https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125120

            Bug ID: 125120
           Summary: Missed auto-vectorization/memcpy replacement in
                    __builtin_unreachable() context
           Product: gcc
           Version: 15.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: nicula at nicula dot xyz
  Target Milestone: ---

Here's some code that tries to emulate a `push_back_unchecked()` function by
using a `__builtin_unreachable()` guard on the condition used by the *checked*
`push_back()` function.

The compiler recognizes `v1` as a `memcpy()`. Although the `abort()` branch is
correctly omitted from the assembly of `v2`, the final codegen is not as
expected (either `memcpy()` pattern recognition or successful
auto-vectorization).

Code:

    #include <cstdlib>

    template<typename T>
    struct Vec {
        T *data{};
        size_t len{};
        size_t capacity{};

        bool full() const {
            return len == capacity;
        }

        void push_back(const T &value) {
            if (full()) {
                abort();
            }
            data[len] = value;
            ++len;
        }

        void push_back_unchecked(const T &value) {
            data[len] = value;
            ++len;
        }
    };

    void v1(Vec<int> &__restrict dst, const int *__restrict src, size_t len) {
        for (size_t i = 0; i != len; ++i) {
            dst.push_back_unchecked(src[i]);
        }
    }

    void v2(Vec<int> &__restrict dst, const int *__restrict src, size_t len) {
        for (size_t i = 0; i != len; ++i) {
            if (dst.full()) {
                __builtin_unreachable();
            }
            dst.push_back(src[i]);
        }
    }

Assembly:

    v1(Vec<int>&, int const*, unsigned long):
            test    rdx, rdx
            je      .L8
            push    r12
            push    rbp
            push    rbx
            mov     r12, QWORD PTR [rdi+8]
            mov     rax, QWORD PTR [rdi]
            mov     rbx, rdx
            mov     rbp, rdi
            lea     rdx, [0+rdx*4]
            lea     rdi, [rax+r12*4]
            add     r12, rbx
            call    memcpy
            mov     QWORD PTR [rbp+8], r12
            pop     rbx
            pop     rbp
            pop     r12
            ret
    .L8:
            ret

    v2(Vec<int>&, int const*, unsigned long):
            test    rdx, rdx
            je      .L17
            mov     rax, QWORD PTR [rdi+8]
            mov     rcx, rax
            add     rdx, rax
            neg     rcx
            lea     r8, [rsi+rcx*4]
            mov     rsi, QWORD PTR [rdi]
    .L14:
            mov     ecx, DWORD PTR [r8+rax*4]
            mov     DWORD PTR [rsi+rax*4], ecx
            inc     rax
            mov     QWORD PTR [rdi+8], rax
            cmp     rdx, rax
            jne     .L14
    .L17:
            ret

Reply via email to