https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98218

            Bug ID: 98218
           Summary: [TARGET_MMX_WITH_SSE] Miss vec_cmpmn/vcondmn expander
                    for 64bit vector
           Product: gcc
           Version: 11.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: crazylht at gmail dot com
  Target Milestone: ---
              Host: x86_64-pc-linux-gnu
            Target: x86_64-*-* i?86-*-*

Refer to https://godbolt.org/z/sYE88f

cat test.c

typedef char v8qi __attribute__ ((vector_size(8)));
v8qi f1(v8qi a, v8qi b) {
  return a == b;
}

gcc -O2 -msse4.1 -S

f1(char __vector(8), char __vector(8)):
        pextrb  edx, xmm0, 0
        pextrb  eax, xmm1, 0
        pextrb  ecx, xmm0, 1
        cmp     dl, al
        pextrb  eax, xmm1, 1
        pextrb  esi, xmm0, 2
        setne   dl
        pextrb  edi, xmm0, 3
        pextrb  r8d, xmm0, 4
        sub     edx, 1
        cmp     cl, al
        pextrb  eax, xmm1, 2
        setne   cl
        pextrb  r9d, xmm0, 5
        movzx   edx, dl
        sub     ecx, 1
        cmp     sil, al
        pextrb  eax, xmm1, 3
        setne   sil
        pextrb  r10d, xmm0, 6
        pextrb  r11d, xmm0, 7
        movzx   ecx, cl
        sub     esi, 1
        cmp     dil, al
        pextrb  eax, xmm1, 4
        setne   dil
        movzx   esi, sil
        sub     edi, 1
        cmp     r8b, al
        pextrb  eax, xmm1, 5
        setne   r8b
        movzx   edi, dil
        sub     r8d, 1
        cmp     r9b, al
        pextrb  eax, xmm1, 6
        setne   r9b
        movzx   r8d, r8b
        sub     r9d, 1
        cmp     r10b, al
        pextrb  eax, xmm1, 7
        setne   r10b
        movzx   r9d, r9b
        sub     r10d, 1
        cmp     r11b, al
        setne   al
        movzx   r10d, r10b
        sub     eax, 1
        movzx   eax, al
        sal     rax, 8
        or      rax, r10
        sal     rax, 8
        or      rax, r9
        sal     rax, 8
        or      rax, r8
        sal     rax, 8
        or      rax, rdi
        sal     rax, 8
        or      rax, rsi
        sal     rax, 8
        or      rax, rcx
        sal     rax, 8
        or      rax, rdx
        movq    xmm0, rax
        ret

It should be better with

f1(char __vector(8), char __vector(8)):                           # @f1(char
__vector(8), char __vector(8))
        pcmpeqb xmm0, xmm1
        ret

Reply via email to