https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69871

            Bug ID: 69871
           Summary: Type punned structs returned by value optimized poorly
           Product: gcc
           Version: 5.3.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: rf at rufflewind dot com
  Target Milestone: ---

The following code, which unpacks a 32-bit integer into a struct of four bytes,
does not optimize as well as it should.  While "unpack" seems to optimize just
fine, trivial wrappers of the function do not seem to get optimized nearly as
well:

- Two of the wrappers ("wrapper", "wrapper2") are completely identical yet they
do not result in the same assembly code.  One is optimized well, the other is
not.
- Adding another layer of indirection ("wrapperwrapper") also prevents the
optimization from occurring.

The problem occurs not only for union-based type-punning, but also for similar
tricks that involve:

  - memcpy, where all three wrappers would optimize poorly, or
  - bitshift operators, where even "unpack" would optimize poorly.

See also: https://gcc.gnu.org/ml/gcc/2016-02/msg00244.html

The code was compiled with "gcc -fverbose-asm -Wall -S -O3 foo.c" on Linux
4.4.1 x86-64.  The GCC binaries are part of the Arch Linux's gcc-multilib
5.3.0-4 binary package.

---

struct alpha {
    char a, b, c, d;
};

struct alpha unpack(unsigned x)
{
    union {
        struct alpha r;
        unsigned i;
    } u;
    u.i = x;
    return u.r;
}

struct alpha wrapper(unsigned y)
{
    return unpack(y);
}

struct alpha wrapper2(unsigned y)
{
    return unpack(y);
}

struct alpha wrapperwrapper(unsigned y)
{
    return wrapper(y);
}

---

        .file   "foo.c"
# GNU C11 (GCC) version 5.3.0 (x86_64-unknown-linux-gnu)
#       compiled by GNU C version 5.3.0, GMP version 6.1.0, MPFR version
3.1.3-p5, MPC version 1.0.3
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed:  foo.c -mtune=generic -march=x86-64 -O3 -Wall
# -fverbose-asm
# options enabled:  -faggressive-loop-optimizations -falign-labels
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg
# -fcaller-saves -fchkp-check-incomplete-type -fchkp-check-read
# -fchkp-check-write -fchkp-instrument-calls -fchkp-narrow-bounds
# -fchkp-optimize -fchkp-store-bounds -fchkp-use-static-bounds
# -fchkp-use-static-const-bounds -fchkp-use-wrappers
# -fcombine-stack-adjustments -fcommon -fcompare-elim -fcprop-registers
# -fcrossjumping -fcse-follow-jumps -fdefer-pop
# -fdelete-null-pointer-checks -fdevirtualize -fdevirtualize-speculatively
# -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types
# -fexpensive-optimizations -fforward-propagate -ffunction-cse -fgcse
# -fgcse-after-reload -fgcse-lm -fgnu-runtime -fgnu-unique
# -fguess-branch-probability -fhoist-adjacent-loads -fident -fif-conversion
# -fif-conversion2 -findirect-inlining -finline -finline-atomics
# -finline-functions -finline-functions-called-once
# -finline-small-functions -fipa-cp -fipa-cp-alignment -fipa-cp-clone
# -fipa-icf -fipa-icf-functions -fipa-icf-variables -fipa-profile
# -fipa-pure-const -fipa-ra -fipa-reference -fipa-sra -fira-hoist-pressure
# -fira-share-save-slots -fira-share-spill-slots
# -fisolate-erroneous-paths-dereference -fivopts -fkeep-static-consts
# -fleading-underscore -flifetime-dse -flra-remat -flto-odr-type-merging
# -fmath-errno -fmerge-constants -fmerge-debug-strings
# -fmove-loop-invariants -fomit-frame-pointer -foptimize-sibling-calls
# -foptimize-strlen -fpartial-inlining -fpeephole -fpeephole2
# -fpredictive-commoning -fprefetch-loop-arrays -free -freg-struct-return
# -freorder-blocks -freorder-blocks-and-partition -freorder-functions
# -frerun-cse-after-loop -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fschedule-fusion
# -fschedule-insns2 -fsemantic-interposition -fshow-column -fshrink-wrap
# -fsigned-zeros -fsplit-ivs-in-unroller -fsplit-wide-types -fssa-phiopt
# -fstdarg-opt -fstrict-aliasing -fstrict-overflow
# -fstrict-volatile-bitfields -fsync-libcalls -fthread-jumps
# -ftoplevel-reorder -ftrapping-math -ftree-bit-ccp -ftree-builtin-call-dce
# -ftree-ccp -ftree-ch -ftree-coalesce-vars -ftree-copy-prop
# -ftree-copyrename -ftree-cselim -ftree-dce -ftree-dominator-opts
# -ftree-dse -ftree-forwprop -ftree-fre -ftree-loop-distribute-patterns
# -ftree-loop-if-convert -ftree-loop-im -ftree-loop-ivcanon
# -ftree-loop-optimize -ftree-loop-vectorize -ftree-parallelize-loops=
# -ftree-partial-pre -ftree-phiprop -ftree-pre -ftree-pta -ftree-reassoc
# -ftree-scev-cprop -ftree-sink -ftree-slp-vectorize -ftree-slsr -ftree-sra
# -ftree-switch-conversion -ftree-tail-merge -ftree-ter -ftree-vrp
# -funit-at-a-time -funswitch-loops -funwind-tables -fverbose-asm
# -fzero-initialized-in-bss -m128bit-long-double -m64 -m80387
# -malign-stringops -mavx256-split-unaligned-load
# -mavx256-split-unaligned-store -mfancy-math-387 -mfp-ret-in-387 -mfxsr
# -mglibc -mieee-fp -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone
# -msse -msse2 -mtls-direct-seg-refs -mvzeroupper

        .section        .text.unlikely,"ax",@progbits
.LCOLDB0:
        .text
.LHOTB0:
        .p2align 4,,15
        .globl  unpack
        .type   unpack, @function
unpack:
.LFB0:
        .cfi_startproc
        movl    %edi, %eax      # x, x
        ret
        .cfi_endproc
.LFE0:
        .size   unpack, .-unpack
        .section        .text.unlikely
.LCOLDE0:
        .text
.LHOTE0:
        .section        .text.unlikely
.LCOLDB1:
        .text
.LHOTB1:
        .p2align 4,,15
        .globl  wrapper
        .type   wrapper, @function
wrapper:
.LFB5:
        .cfi_startproc
        movl    %edi, %eax      # y, y
        xorl    %edx, %edx      # retval.9
        movsbl  %ah, %eax       # y, SR.14
        movb    %dil, %dl       # y, retval.9
        movb    %al, %dh        # SR.14, retval.9
        movl    %edi, %eax      # y, tmp101
        andl    $-16777216, %edi        #, tmp105
        andl    $16711680, %eax #, tmp101
        movzwl  %dx, %edx       # retval.9, tmp103
        orl     %eax, %edx      # tmp101, tmp106
        movl    %edx, %eax      # tmp106, tmp107
        orl     %edi, %eax      # tmp105, tmp107
        ret
        .cfi_endproc
.LFE5:
        .size   wrapper, .-wrapper
        .section        .text.unlikely
.LCOLDE1:
        .text
.LHOTE1:
        .section        .text.unlikely
.LCOLDB2:
        .text
.LHOTB2:
        .p2align 4,,15
        .globl  wrapper2
        .type   wrapper2, @function
wrapper2:
.LFB2:
        .cfi_startproc
        movl    %edi, %eax      # y, y
        ret
        .cfi_endproc
.LFE2:
        .size   wrapper2, .-wrapper2
        .section        .text.unlikely
.LCOLDE2:
        .text
.LHOTE2:
        .section        .text.unlikely
.LCOLDB3:
        .text
.LHOTB3:
        .p2align 4,,15
        .globl  wrapperwrapper
        .type   wrapperwrapper, @function
wrapperwrapper:
.LFB3:
        .cfi_startproc
        movl    %edi, %eax      # y, y
        xorl    %edx, %edx      # D.1859
        movsbl  %ah, %eax       # y, SR.5
        movb    %dil, %dl       # y, D.1859
        movb    %al, %dh        # SR.5, D.1859
        movl    %edi, %eax      # y, tmp101
        andl    $-16777216, %edi        #, tmp105
        andl    $16711680, %eax #, tmp101
        movzwl  %dx, %edx       # D.1859, tmp103
        orl     %eax, %edx      # tmp101, tmp106
        movl    %edx, %eax      # tmp106, tmp107
        orl     %edi, %eax      # tmp105, tmp107
        ret
        .cfi_endproc
.LFE3:
        .size   wrapperwrapper, .-wrapperwrapper
        .section        .text.unlikely
.LCOLDE3:
        .text
.LHOTE3:
        .ident  "GCC: (GNU) 5.3.0"
        .section        .note.GNU-stack,"",@progbits

Reply via email to