Bug ID: 69871
           Summary: Type punned structs returned by value optimized poorly
           Product: gcc
           Version: 5.3.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot
          Reporter: rf at rufflewind dot com
  Target Milestone: ---

The following code, which unpacks a 32-bit integer into a struct of four bytes,
does not optimize as well as it should.  While "unpack" seems to optimize just
fine, trivial wrappers of the function do not seem to get optimized nearly as

- Two of the wrappers ("wrapper", "wrapper2") are completely identical yet they
do not result in the same assembly code.  One is optimized well, the other is
- Adding another layer of indirection ("wrapperwrapper") also prevents the
optimization from occurring.

The problem occurs not only for union-based type-punning, but also for similar
tricks that involve:

  - memcpy, where all three wrappers would optimize poorly, or
  - bitshift operators, where even "unpack" would optimize poorly.

See also:

The code was compiled with "gcc -fverbose-asm -Wall -S -O3 foo.c" on Linux
4.4.1 x86-64.  The GCC binaries are part of the Arch Linux's gcc-multilib
5.3.0-4 binary package.


struct alpha {
    char a, b, c, d;

struct alpha unpack(unsigned x)
    union {
        struct alpha r;
        unsigned i;
    } u;
    u.i = x;
    return u.r;

struct alpha wrapper(unsigned y)
    return unpack(y);

struct alpha wrapper2(unsigned y)
    return unpack(y);

struct alpha wrapperwrapper(unsigned y)
    return wrapper(y);


        .file   "foo.c"
# GNU C11 (GCC) version 5.3.0 (x86_64-unknown-linux-gnu)
#       compiled by GNU C version 5.3.0, GMP version 6.1.0, MPFR version
3.1.3-p5, MPC version 1.0.3
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed:  foo.c -mtune=generic -march=x86-64 -O3 -Wall
# -fverbose-asm
# options enabled:  -faggressive-loop-optimizations -falign-labels
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg
# -fcaller-saves -fchkp-check-incomplete-type -fchkp-check-read
# -fchkp-check-write -fchkp-instrument-calls -fchkp-narrow-bounds
# -fchkp-optimize -fchkp-store-bounds -fchkp-use-static-bounds
# -fchkp-use-static-const-bounds -fchkp-use-wrappers
# -fcombine-stack-adjustments -fcommon -fcompare-elim -fcprop-registers
# -fcrossjumping -fcse-follow-jumps -fdefer-pop
# -fdelete-null-pointer-checks -fdevirtualize -fdevirtualize-speculatively
# -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types
# -fexpensive-optimizations -fforward-propagate -ffunction-cse -fgcse
# -fgcse-after-reload -fgcse-lm -fgnu-runtime -fgnu-unique
# -fguess-branch-probability -fhoist-adjacent-loads -fident -fif-conversion
# -fif-conversion2 -findirect-inlining -finline -finline-atomics
# -finline-functions -finline-functions-called-once
# -finline-small-functions -fipa-cp -fipa-cp-alignment -fipa-cp-clone
# -fipa-icf -fipa-icf-functions -fipa-icf-variables -fipa-profile
# -fipa-pure-const -fipa-ra -fipa-reference -fipa-sra -fira-hoist-pressure
# -fira-share-save-slots -fira-share-spill-slots
# -fisolate-erroneous-paths-dereference -fivopts -fkeep-static-consts
# -fleading-underscore -flifetime-dse -flra-remat -flto-odr-type-merging
# -fmath-errno -fmerge-constants -fmerge-debug-strings
# -fmove-loop-invariants -fomit-frame-pointer -foptimize-sibling-calls
# -foptimize-strlen -fpartial-inlining -fpeephole -fpeephole2
# -fpredictive-commoning -fprefetch-loop-arrays -free -freg-struct-return
# -freorder-blocks -freorder-blocks-and-partition -freorder-functions
# -frerun-cse-after-loop -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fschedule-fusion
# -fschedule-insns2 -fsemantic-interposition -fshow-column -fshrink-wrap
# -fsigned-zeros -fsplit-ivs-in-unroller -fsplit-wide-types -fssa-phiopt
# -fstdarg-opt -fstrict-aliasing -fstrict-overflow
# -fstrict-volatile-bitfields -fsync-libcalls -fthread-jumps
# -ftoplevel-reorder -ftrapping-math -ftree-bit-ccp -ftree-builtin-call-dce
# -ftree-ccp -ftree-ch -ftree-coalesce-vars -ftree-copy-prop
# -ftree-copyrename -ftree-cselim -ftree-dce -ftree-dominator-opts
# -ftree-dse -ftree-forwprop -ftree-fre -ftree-loop-distribute-patterns
# -ftree-loop-if-convert -ftree-loop-im -ftree-loop-ivcanon
# -ftree-loop-optimize -ftree-loop-vectorize -ftree-parallelize-loops=
# -ftree-partial-pre -ftree-phiprop -ftree-pre -ftree-pta -ftree-reassoc
# -ftree-scev-cprop -ftree-sink -ftree-slp-vectorize -ftree-slsr -ftree-sra
# -ftree-switch-conversion -ftree-tail-merge -ftree-ter -ftree-vrp
# -funit-at-a-time -funswitch-loops -funwind-tables -fverbose-asm
# -fzero-initialized-in-bss -m128bit-long-double -m64 -m80387
# -malign-stringops -mavx256-split-unaligned-load
# -mavx256-split-unaligned-store -mfancy-math-387 -mfp-ret-in-387 -mfxsr
# -mglibc -mieee-fp -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone
# -msse -msse2 -mtls-direct-seg-refs -mvzeroupper

        .section        .text.unlikely,"ax",@progbits
        .p2align 4,,15
        .globl  unpack
        .type   unpack, @function
        movl    %edi, %eax      # x, x
        .size   unpack, .-unpack
        .section        .text.unlikely
        .section        .text.unlikely
        .p2align 4,,15
        .globl  wrapper
        .type   wrapper, @function
        movl    %edi, %eax      # y, y
        xorl    %edx, %edx      # retval.9
        movsbl  %ah, %eax       # y, SR.14
        movb    %dil, %dl       # y, retval.9
        movb    %al, %dh        # SR.14, retval.9
        movl    %edi, %eax      # y, tmp101
        andl    $-16777216, %edi        #, tmp105
        andl    $16711680, %eax #, tmp101
        movzwl  %dx, %edx       # retval.9, tmp103
        orl     %eax, %edx      # tmp101, tmp106
        movl    %edx, %eax      # tmp106, tmp107
        orl     %edi, %eax      # tmp105, tmp107
        .size   wrapper, .-wrapper
        .section        .text.unlikely
        .section        .text.unlikely
        .p2align 4,,15
        .globl  wrapper2
        .type   wrapper2, @function
        movl    %edi, %eax      # y, y
        .size   wrapper2, .-wrapper2
        .section        .text.unlikely
        .section        .text.unlikely
        .p2align 4,,15
        .globl  wrapperwrapper
        .type   wrapperwrapper, @function
        movl    %edi, %eax      # y, y
        xorl    %edx, %edx      # D.1859
        movsbl  %ah, %eax       # y, SR.5
        movb    %dil, %dl       # y, D.1859
        movb    %al, %dh        # SR.5, D.1859
        movl    %edi, %eax      # y, tmp101
        andl    $-16777216, %edi        #, tmp105
        andl    $16711680, %eax #, tmp101
        movzwl  %dx, %edx       # D.1859, tmp103
        orl     %eax, %edx      # tmp101, tmp106
        movl    %edx, %eax      # tmp106, tmp107
        orl     %edi, %eax      # tmp105, tmp107
        .size   wrapperwrapper, .-wrapperwrapper
        .section        .text.unlikely
        .ident  "GCC: (GNU) 5.3.0"
        .section        .note.GNU-stack,"",@progbits

