https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122470

            Bug ID: 122470
           Summary: Suboptimal code generation for assignment of struct
                    with bitfields
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: slash.tmp at free dot fr
  Target Milestone: ---

#include <stdint.h>
struct s1 {
        uint32_t f_1 :  8;
        uint32_t f_2 : 24;
};
union u1 {
        struct s1 s1;
        uint32_t raw;
};
void foo1(struct s1 *out, unsigned u) { out->f_2 = u; }
void foo2(struct s1 *out, unsigned u) { *out = (struct s1){ 0, u }; }
void foo3(struct s1 *out, unsigned u) { struct s1 vv = { 0, u }; *out = vv; }
void foo4(union u1 *out, unsigned u) { out->raw = (u<<8); }
void foo5(union u1 *out, unsigned u) { union u1 vv = {{ 0, u }}; out->raw =
vv.raw; }
void foo6(union u1 *out, unsigned u) { out->raw = (union u1){{ 0, u }}.raw; }

Using godbolt.org with gcc-trunk -O3

foo1:
        movl    %esi, %eax
        movzbl  (%rdi), %esi
        sall    $8, %eax
        orl     %eax, %esi
        movl    %esi, (%rdi)
        ret
foo2:
        movb    $0, (%rdi)
        movl    %esi, %eax
        movzbl  (%rdi), %esi
        sall    $8, %eax
        orl     %eax, %esi
        movl    %esi, (%rdi)
        ret
foo3:
        movb    $0, (%rdi)
        movl    %esi, %eax
        movzbl  (%rdi), %esi
        sall    $8, %eax
        orl     %eax, %esi
        movl    %esi, (%rdi)
        ret
foo4:
        sall    $8, %esi
        movl    %esi, (%rdi)
        ret
foo5:
        sall    $8, %esi
        movl    %esi, (%rdi)
        ret
foo6:
        sall    $8, %esi
        movl    %esi, (%rdi)
        ret


In foo1, gcc must merge f_1 & f_2 before storing the result.
It seems generated code could be (very slightly) improved by not using %esi for
f_1.

foo1:
        movzbl  (%rdi), %eax
        sall    $8, %esi
        orl     %eax, %esi
        movl    %esi, (%rdi)
        ret

In the next functions, f_1 is over-written as 0 => no merging required.

foo2 & foo3 are syntactically equivalent.
Generated code is a mess.
GCC first stores f_1 = 0, before loading the value from memory again.
(Pointless 8-bit store + 8-bit load)

Expecting code like foo{4,5,6}

Reply via email to