https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125865

            Bug ID: 125865
           Summary: Unnecessary branches when expanding memset
           Product: gcc
           Version: 16.1.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hjl.tools at gmail dot com
  Target Milestone: ---
            Target: x86-64

[hjl@gnu-zen4-1 pr125856]$ cat z.c
void
bounded_copy (char *dst, int c, __SIZE_TYPE__ n)
{
  if (n <= 15)
    __builtin_memset(dst, c, n);
}
[hjl@gnu-zen4-1 pr125856]$ gcc -S -O2 z.c
[hjl@gnu-zen4-1 pr125856]$ cat z.s
        .file   "z.c"
        .text
        .p2align 4
        .globl  bounded_copy
        .type   bounded_copy, @function
bounded_copy:
.LFB0:
        .cfi_startproc
        cmpq    $15, %rdx
        jbe     .L17
.L1:
        ret
        .p2align 4,,10
        .p2align 3
.L17:
        movabsq $72340172838076673, %rax
        movzbl  %sil, %esi
        imulq   %rax, %rsi
        movq    %rsi, %xmm0
        punpcklqdq      %xmm0, %xmm0
        cmpl    $64, %edx
        jnb     .L3 <<<<<<<< Always false
        testb   $32, %dl
        jne     .L18 <<<<<<<< Always false
        testb   $16, %dl
        jne     .L19 <<<<<<<< Always false.
        testb   $8, %dl
        jne     .L20
        testb   $4, %dl
        jne     .L21
        testl   %edx, %edx
        je      .L1
        movb    %sil, (%rdi)
        testb   $2, %dl
        je      .L1
        movl    %edx, %edx
        movw    %si, -2(%rdi,%rdx)
        ret
        .p2align 4,,10
        .p2align 3
.L3:
        movl    %edx, %eax
        subl    $1, %edx
        movups  %xmm0, -64(%rdi,%rax)
        movups  %xmm0, -48(%rdi,%rax)
        movups  %xmm0, -32(%rdi,%rax)
        movups  %xmm0, -16(%rdi,%rax)
        cmpl    $64, %edx
        jb      .L1
        andl    $-64, %edx
        xorl    %eax, %eax
.L10:
        movl    %eax, %ecx
        addl    $64, %eax
        movups  %xmm0, (%rdi,%rcx)
        movups  %xmm0, 16(%rdi,%rcx)
        movups  %xmm0, 32(%rdi,%rcx)
        movups  %xmm0, 48(%rdi,%rcx)
        cmpl    %edx, %eax
        jb      .L10
        ret
        .p2align 4,,10
        .p2align 3
.L18:
        movl    %edx, %edx
        movups  %xmm0, (%rdi)
        movups  %xmm0, 16(%rdi)
        movups  %xmm0, -32(%rdi,%rdx)
        movups  %xmm0, -16(%rdi,%rdx)
        ret
        .p2align 4,,10
        .p2align 3
.L19:
        movl    %edx, %edx
        movups  %xmm0, (%rdi)
        movups  %xmm0, -16(%rdi,%rdx)
        ret
        .p2align 4,,10
        .p2align 3
.L20:
        movl    %edx, %edx
        movq    %rsi, (%rdi)
        movq    %rsi, -8(%rdi,%rdx)
        ret
        .p2align 4,,10
        .p2align 3
.L21:
        movl    %edx, %edx
        movl    %esi, (%rdi)
        movl    %esi, -4(%rdi,%rdx)
        ret
        .cfi_endproc
.LFE0:
        .size   bounded_copy, .-bounded_copy
        .ident  "GCC: (GNU) 16.1.1 20260515 (Red Hat 16.1.1-2)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-zen4-1 pr125856]$

Reply via email to