https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112835

Florian Weimer <fw at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |fw at gcc dot gnu.org

--- Comment #3 from Florian Weimer <fw at gcc dot gnu.org> ---
There's a related issue on x86-64 maybe?

#include <string.h>

int
f (const char *a, const char *b)
{
    return memcmp (a, b, 64) == 0;
}

produces with -O2 -march=x86-64-v3:

f:
        vmovdqu (%rdi), %ymm0
        vpxor   (%rsi), %ymm0, %ymm0
        vptest  %ymm0, %ymm0
        jne     .L2
        vmovdqu 32(%rdi), %ymm0
        vpxor   32(%rsi), %ymm0, %ymm0
        vptest  %ymm0, %ymm0
        je      .L5
.L2:
        movl    $1, %eax
        xorl    $1, %eax
        vzeroupper
        ret
        .p2align 4,,10
        .p2align 3
.L5:
        xorl    %eax, %eax
        xorl    $1, %eax
        vzeroupper
        ret

The 32-byte comparison produces a branchless sequence, so I would expect
something like this:

f:
        xorl    %eax, %eax
        vmovdqu (%rdi), %ymm0
        vpxor   (%rsi), %ymm0, %ymm0
        vptest  %ymm0, %ymm0
        jne 1f
        vmovdqu 32(%rdi), %ymm0
        vpxor   32(%rsi), %ymm0, %ymm0
        vptest  %ymm0, %ymm0
        sete    %al
1:
        vzeroupper
        ret

Reply via email to