https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82325

            Bug ID: 82325
           Summary: worse code generated compared to clang when using a
                    constexpr array
           Product: gcc
           Version: 7.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: dvd at gnx dot it
  Target Milestone: ---

While testing some functions I'm writing for a deflate compressor I've noticed
that the following code is translated differently between gcc 7.2 and clang 5.0

        #include <array>

        struct code_value
        {
                uint16_t base;
                uint8_t bits;
        };

        constexpr std::array<code_value, 29> al = {{
                {  3, 0}, {  4, 0}, {  5, 0}, {  6, 0}, {  7, 0}, {  8, 0}, { 
9, 0}, {  10, 0},
                { 11, 1}, { 13, 1}, { 15, 1}, { 17, 1},
                { 19, 2}, { 23, 2}, { 27, 2}, { 31, 2},
                { 35, 3}, { 43, 3}, { 51, 3}, { 59, 3},
                { 67, 4}, { 83, 4}, { 99, 4}, {115, 4},
                {131, 5}, {163, 5}, {195, 5}, {227, 5},
                {258, 0}
        }};


        code_value f(int v) {
                size_t index = 0;
                while (index < al.size()) {
                        auto mi = al[index].base;
                        auto mx = al[index].base + (1 << al[index].bits);
                        if (mi <= v && v < mx)
                                break;
                        index++;
                }
                return al[index];
        }

On gcc (with -O3 and -funroll-loop) every iteration is (more or less):

        .L4:
          movzx ecx, BYTE PTR al[2+rax*4]
          movzx r9d, WORD PTR al[0+rax*4]
          mov r10d, esi
          sal r10d, cl
          add r10d, r9d
          cmp r10d, edi
          jle .L5
          cmp r9d, edi
          jle .L2

while on clang 

        .LBB0_4:
          cmp edi, 13
          jge .LBB0_6
          mov eax, 8
          mov eax, dword ptr [4*rax + al]
          ret

It looks like the latter is able to infer at compilet time the values of
`al[index].base + (1 << al[index].bits);`

godbolt link:
https://godbolt.org/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAKxAEZSBnVAV2OUxAHIBSAJgGY8AO2QAbZlgDU3fgGEAhsWLyAnjOzcADAEEt2hgWLNkBSWiwB9AG7zxmPdwDsAIQeaAnM2EFaANgumAEbyDPb8rjpant4AHAGSgYQMMhG6jgAiKW7aaEIGmAAeAA7EkgboICCKympy5pjWtsyYpJK87uqSttL86dIuTqlRg5KS/K2aTumtI5IALBNTMy6jAKyLGcvOo74b0/3bko57W6MxJwej7heztJonbh0rtPSStEuXtONvHyO06z9Np9joDpo8/tc2r8VgJWrxodteCD4UCRvxXiiwZEPGiAfwEfNvvjUStVq9ifsRqtIRTSOCVr4QXMCTFvsySdt3JD2ZSXC8ATy6dins4vq9Vgi/N8JRzaO4ATLec5eEjWoqhbocS5eKtzpJJhkHIbDeFsnp6o07JIAGYQbySKwASgOjwYeAAXg1TMIsAUen1JqbhQB3BB4USYSR2oS%2BnqyLqiAB0bs9EEdzsG4I88mYREkAFs8P6E9xVs4fYVS%2BlE8FQllhVEc3n836ZH1bKXyzHK6tq7XI3xthBaHGZPGO2WKwUqzWko765qOh48Nao4XR70HdJeL4%2BL4t2OCwV5w2cR5AsRMPIANYL4bL7vT3iuZ93rWZYWXgisIQlyePqs7wyThHVIUQuFWThSCELhNCg1AuFkQdBzKFg2AHARaCgghYJA0DrxAfh%2BETIjSLI8jdnAzg5igmDODg0gEM4KCGBAe4cPokDSDgWAkDQfMinDTAyAoCB%2BMEiNiBAYBHF4UhrXDAhhNYiBAlw0hEiERQVC4LDSH4/NMCEAgAHkhFEHTONILB83kIRgAjdT8EvEw8CsTBWKswpMGQXMOE4PTvEwKiGMMPB81w0DRDwQJWMgUDUCKAg8FQPIuAAWnKNtkGQ59aHdSR0pM/hCutZghGIVBRFEdLRFQRKGBYtD2DoKKINo9SmIKGJfHS3w5kkYBkGQI5E14KNcEIEht3RVpZFQAShNKPh0UdbDIvwwjiPInbSMorgaOgzquBYtjSA4uDQJ4xAUAWiThPIShxKWlBRDs4BVk0e4FNEJTiBUtSrM07TdKggyjNM8zLIYmz3scqznJ85L3M8hjvN8pTQfIYzgvUsKIs4qKYritNGKSlK0s4TKCHQbLctFAqiv4JrWBa2g2s4SCjqsrqer6gaxHeyRVkTTRRYm/AiGWzC5rupaZt4Nbzo20gCKIkjdp2sCDo6nmTsYM6Lrw7XOF4XWGKY9bCdA9z/opkA5iAA%3D

Reply via email to