efficient as direct comparisons of the underlying primitive type under -Os

msebor at gcc dot gnu.org Wed, 15 Jan 2020 02:35:18 -0800

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93265


Martin Sebor <msebor at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |msebor at gcc dot gnu.org
           See Also|                            |https://gcc.gnu.org/bugzill
                   |                            |a/show_bug.cgi?id=85330
             Blocks|                            |83819

--- Comment #2 from Martin Sebor <msebor at gcc dot gnu.org> ---
The strlen pass does have this optimization but the pass only performs it at
-O2 and higher and not with -Os.  I don't know why it doesn't run at all
optimization levels.  Although some of its transformations might increase code
size, on balance, I'd expect it to both decrease it and emit faster code.  For
this example:

$ cat z.c && gcc -Os -S -Wall -fdump-tree-optimized=/dev/stdout -o/dev/stdout
z.c
int f (int x)
{
  return __builtin_memcmp (&x, (int[]){ 42 }, sizeof x) == 0;
}

        .file   "z.c"
        .text

;; Function f (f, funcdef_no=0, decl_uid=1930, cgraph_uid=1, symbol_order=0)

f (int x)
{
  int D.1932[1];
  int _1;
  _Bool _2;
  int _5;

  <bb 2> [local count: 1073741824]:
  D.1932[0] = 42;
  _1 = __builtin_memcmp (&x, &D.1932, 4);
  _2 = _1 == 0;
  _5 = (int) _2;
  D.1932 ={v} {CLOBBER};
  return _5;

}


        .globl  f
        .type   f, @function
f:
.LFB0:
        .cfi_startproc
        subq    $40, %rsp
        .cfi_def_cfa_offset 48
        movl    $4, %edx
        movl    %edi, 12(%rsp)
        leaq    28(%rsp), %rsi
        leaq    12(%rsp), %rdi
        movl    $42, 28(%rsp)
        call    memcmp
        testl   %eax, %eax
        sete    %al
        addq    $40, %rsp
        .cfi_def_cfa_offset 8
        movzbl  %al, %eax
        ret
        .cfi_endproc
.LFE0:
        .size   f, .-f
        .ident  "GCC: (GNU) 10.0.0 20200115 (experimental)"
        .section        .note.GNU-stack,"",@progbits

Whereas at -O2 the object code is much smaller:

        .p2align 4
        .globl  f
        .type   f, @function
f:
.LFB0:
        .cfi_startproc
        xorl    %eax, %eax
        cmpl    $42, %edi
        sete    %al
        ret
        .cfi_endproc
.LFE0:
        .size   f, .-f
        .ident  "GCC: (GNU) 10.0.0 20200115 (experimental)"
        .section        .note.GNU-stack,"",@progbits

See also pr85330.  I can look into enabling it at all optimization levels for
GCC 11.


Referenced Bugs:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83819
[Bug 83819] [meta-bug] missing strlen optimizations

[Bug tree-optimization/93265] memcmp comparisons of structs wrapping a primitive type not as compact/efficient as direct comparisons of the underlying primitive type under -Os

Reply via email to