https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113560

            Bug ID: 113560
           Summary: Strange code generated when optimizing a
                    multiplication on x86_64
           Product: gcc
           Version: 13.2.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: accelerator0099 at gmail dot com
  Target Milestone: ---

Code:
#include <immintrin.h>
auto f(char *buf, unsigned long long in) noexcept
{
    unsigned long long hi{};
    auto lo{_mulx_u64(in, 0x2af31dc462ull, &hi)};
    lo = _mulx_u64(lo, 100, &hi);
    __builtin_memcpy(buf + 2, &hi, 2);
    return buf + 10;
}
auto g(char *buf, unsigned long long in) noexcept
{
    unsigned long long hi{};
    _mulx_u64(in, 100, &hi);
    __builtin_memcpy(buf + 2, &hi, 2);
    return buf + 10;
}

Compile with:
-Ofast -std=c++23 -march=znver4

GCC 13.2 and truck generate:
f(char*, unsigned long long):
        movabs  rdx, 184467440738
        mov     rax, rdi
        mulx    r9, r8, rsi
        xor     r9d, r9d
        mov     rsi, r8
        mov     rdi, r9
        add     rsi, r8
        shld    rdi, r8, 1
        add     rsi, r8
        adc     rdi, r9
        shld    rdi, rsi, 3
        sal     rsi, 3
        add     rsi, r8
        adc     rdi, r9
        add     rax, 10
        shld    rdi, rsi, 2
        mov     WORD PTR [rax-8], di
        ret
g(char*, unsigned long long):
        mov     eax, 100
        mul     rsi
        lea     rax, [rdi+10]
        mov     WORD PTR [rdi+2], dx
        ret

GCC 12 generates:
f(char*, unsigned long long):
        movabs  rdx, 184467440738
        mov     rax, rsi
        imul    rax, rdx
        mov     edx, 100
        mulx    rdx, rax, rax
        lea     rax, [rdi+10]
        mov     WORD PTR [rdi+2], dx
        ret
g(char*, unsigned long long):
        mov     eax, 100
        mul     rsi
        lea     rax, [rdi+10]
        mov     WORD PTR [rdi+2], dx
        ret

Clang:
f(char*, unsigned long long):
unsigned long long)
        movabs  rdx, 184467440738
        mov     eax, 100
        imul    rdx, rsi
        mulx    rax, rax, rax
        mov     word ptr [rdi + 2], ax
        lea     rax, [rdi + 10]
        ret
g(char*, unsigned long long):
unsigned long long)
        mov     eax, 100
        mov     rdx, rsi
        mulx    rax, rax, rax
        mov     word ptr [rdi + 2], ax
        lea     rax, [rdi + 10]
        ret

See also:
https://gcc.godbolt.org/z/df7Gr1MKo

Reply via email to