https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89810

            Bug ID: 89810
           Summary: Suboptimal codegen: integer load/assemble from
                    in-register array of uint8_t
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: nok.raven at gmail dot com
  Target Milestone: ---

Clang optimizes it perfectly, while GCC does byte-per-byte assemble (except for
32/64bit cases) https://godbolt.org/z/8b_D9A

#include <cstdint>
#include <array>

// suboptimal
std::uint16_t foo(std::array<std::uint8_t, 2> x)
{
    return static_cast<std::uint16_t>(x[0])
        | (static_cast<std::uint16_t>(x[1]) << 8)
        ;
}

// suboptimal
std::uint32_t foo(std::array<std::uint8_t, 3> x)
{
    return static_cast<std::uint32_t>(x[0])
        | (static_cast<std::uint32_t>(x[1]) << 8)
        | (static_cast<std::uint32_t>(x[2]) << 16)
        ;
}

// good
std::uint32_t foo(std::array<std::uint8_t, 4> x)
{
    return static_cast<std::uint32_t>(x[0])
        | (static_cast<std::uint32_t>(x[1]) << 8)
        | (static_cast<std::uint32_t>(x[2]) << 16)
        | (static_cast<std::uint32_t>(x[3]) << 24)
        ;
}

// suboptimal
std::uint64_t foo(std::array<std::uint8_t, 5> x)
{
    return static_cast<std::uint64_t>(x[0])
        | (static_cast<std::uint64_t>(x[1]) << 8)
        | (static_cast<std::uint64_t>(x[2]) << 16)
        | (static_cast<std::uint64_t>(x[3]) << 24)
        | (static_cast<std::uint64_t>(x[4]) << 32)
        ;
}

// suboptimal
std::uint64_t foo(std::array<std::uint8_t, 6> x)
{
    return static_cast<std::uint64_t>(x[0])
        | (static_cast<std::uint64_t>(x[1]) << 8)
        | (static_cast<std::uint64_t>(x[2]) << 16)
        | (static_cast<std::uint64_t>(x[3]) << 24)
        | (static_cast<std::uint64_t>(x[4]) << 32)
        | (static_cast<std::uint64_t>(x[5]) << 40)
        ;
}

// suboptimal
std::uint64_t foo(std::array<std::uint8_t, 7> x)
{
    return static_cast<std::uint64_t>(x[0])
        | (static_cast<std::uint64_t>(x[1]) << 8)
        | (static_cast<std::uint64_t>(x[2]) << 16)
        | (static_cast<std::uint64_t>(x[3]) << 24)
        | (static_cast<std::uint64_t>(x[4]) << 32)
        | (static_cast<std::uint64_t>(x[5]) << 40)
        | (static_cast<std::uint64_t>(x[6]) << 48)
        ;
}

// good
std::uint64_t foo(std::array<std::uint8_t, 8> x)
{
    return static_cast<std::uint64_t>(x[0])
        | (static_cast<std::uint64_t>(x[1]) << 8)
        | (static_cast<std::uint64_t>(x[2]) << 16)
        | (static_cast<std::uint64_t>(x[3]) << 24)
        | (static_cast<std::uint64_t>(x[4]) << 32)
        | (static_cast<std::uint64_t>(x[5]) << 40)
        | (static_cast<std::uint64_t>(x[6]) << 48)
        | (static_cast<std::uint64_t>(x[7]) << 56)
        ;
}

Reply via email to