https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90888

            Bug ID: 90888
           Summary: std::swap bad code gen -- alias analysis insufficient
                    to remove dead store
           Product: gcc
           Version: 9.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: david at doublewise dot net
  Target Milestone: ---

The following code optimizes well for `custom_swap` and `restrict_std_swap`,
but has an additional `mov` instruction for `std_swap`:


void custom_swap(int * lhs, int * rhs) {
    int temp = *lhs;
    *lhs = *rhs;
    *rhs = temp;
}

void restrict_std_swap(int * __restrict lhs, int * __restrict rhs) {
    int temp = *lhs;
    *lhs = 0;
    *lhs = *rhs;
    *rhs = temp;
}

void std_swap(int * lhs, int * rhs) {
    int temp = *lhs;
    *lhs = 0;
    *lhs = *rhs;
    *rhs = temp;
}



Compiles into this for x86-64:

custom_swap(int*, int*):
        mov     eax, DWORD PTR [rdi]
        mov     edx, DWORD PTR [rsi]
        mov     DWORD PTR [rdi], edx
        mov     DWORD PTR [rsi], eax
        ret
restrict_std_swap(int*, int*):
        mov     eax, DWORD PTR [rdi]
        mov     edx, DWORD PTR [rsi]
        mov     DWORD PTR [rsi], eax
        mov     DWORD PTR [rdi], edx
        ret
std_swap(int*, int*):
        mov     eax, DWORD PTR [rdi]
        mov     DWORD PTR [rdi], 0
        mov     edx, DWORD PTR [rsi]
        mov     DWORD PTR [rdi], edx
        mov     DWORD PTR [rsi], eax
        ret


And this for ARM64:

custom_swap(int*, int*):
        ldr     w3, [x1]
        ldr     w2, [x0]
        str     w3, [x0]
        str     w2, [x1]
        ret
restrict_std_swap(int*, int*):
        ldr     w2, [x0]
        ldr     w3, [x1]
        str     w3, [x0]
        str     w2, [x1]
        ret
std_swap(int*, int*):
        ldr     w2, [x0]
        str     wzr, [x0]
        ldr     w3, [x1]
        str     w3, [x0]
        str     w2, [x1]
        ret


As we see from the example that annotates the parameters with __restrict, the
problem appears to be that the risk of *lhs aliasing *rhs disables the
optimizer's ability to remove the dead store in the second line of std_swap. It
is able to see that if they don't alias, the store in line 2 is dead. It is not
able to see that if they do alias, the store in line 3 is dead and the store in
line 2 is dead.

See it live: https://godbolt.org/z/D3Ey9i . 




The real life problem here is that types that manage a resource but do not
implement a custom std::swap, as well as all types that recursively contain a
type that manages a resource, suffer from reduced performance for using
std::swap. The larger, slightly more meaningful test case showing how I arrived
at this reduction and its relationship to std::swap:

struct unique_ptr {
    unique_ptr():
        ptr(nullptr)
    {
    }
    unique_ptr(unique_ptr && other) noexcept:
        ptr(other.ptr)
    {
        other.ptr = nullptr;
    }
    unique_ptr & operator=(unique_ptr && other) noexcept {
        delete ptr;
        ptr = nullptr;
        ptr = other.ptr;
        other.ptr = nullptr;
        return *this;
    }
    ~unique_ptr() noexcept {
        delete ptr;
    }

    int * ptr;
};


void custom_swap(unique_ptr & lhs, unique_ptr & rhs) noexcept {
    int * temp = lhs.ptr;
    lhs.ptr = rhs.ptr;
    rhs.ptr = temp;
}

void inlined_std_swap(unique_ptr & lhs, unique_ptr & rhs) noexcept {
    int * temp_ptr = lhs.ptr;
    lhs.ptr = nullptr;

    delete lhs.ptr;
    lhs.ptr = nullptr;
    lhs.ptr = rhs.ptr;
    rhs.ptr = nullptr;

    delete rhs.ptr;
    rhs.ptr = nullptr;

    rhs.ptr = temp_ptr;
    temp_ptr = nullptr;

    delete temp_ptr;
}

void std_swap(unique_ptr & lhs, unique_ptr & rhs) noexcept {
    auto temp = static_cast<unique_ptr &&>(lhs);
    lhs = static_cast<unique_ptr &&>(rhs);
    rhs = static_cast<unique_ptr &&>(temp);
}

See it live: https://godbolt.org/z/yBFa4H

Reply via email to