https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111582
Bug ID: 111582 Summary: [arm-none-eabi-gcc] / suboptimal optimization / bitfield / superfluous stack write Product: gcc Version: 9.3.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: cptarse-luke at yahoo dot com Target Milestone: --- When I try to use a struct with a bitfield, then it happens, that GCC writes to the stack without ever reading it: > arm-none-eabi-gcc -v Using built-in specs. COLLECT_GCC=arm-none-eabi-gcc COLLECT_LTO_WRAPPER=/usr/lib/gcc/arm-none-eabi/9.3.0/lto-wrapper Target: arm-none-eabi Configured with: ../configure --disable-decimal-float --disable-libffi --disable-libgomp --disable-libmudflap --disable-libquadmath --disable-libssp --disable-libstdcxx-pch --disable-libstdc__-v3 --disable-nls --disable-shared --disable-threads --disable-tls --disable-werror --enable-__cxa_atexit --enable-c99 --enable-gnu-indirect-function --enable-interwork --enable-languages=c,c++ --enable-long-long --enable-multilib --enable-plugins --host= --libdir=/usr/lib --libexecdir=/usr/lib --prefix=/usr --target=arm-none-eabi --with-gmp --with-gnu-as --with-gnu-ld --with-headers=/usr/arm-none-eabi/include --with-host-libstdcxx='-static-libgcc -Wl,-Bstatic,-lstdc++,-Bdynamic -lm' --with-isl --with-libelf --with-mpc --with-mpfr --with-multilib-list=rmprofile --with-native-system-header-dir=/include --with-newlib --with-python-dir=share/gcc-arm-none-eabi --with-sysroot=/usr/arm-none-eabi --with-system-zlib Thread model: single gcc version 9.3.0 (GCC) # arm-none-eabi-gcc -save-temps -S a.c -O3 -g -mcpu=cortex-m0plus -mthumb -Wall --specs=nosys.specs -nostdlib -fdata-sections -ffunction-sections -ffreestanding -Winline > cat a.i # 1 "a.c" # 1 "/tmp//" # 1 "<built-in>" # 1 "<command-line>" # 1 "a.c" typedef unsigned char u8; typedef unsigned int u32; extern int fatal(); __attribute__((always_inline)) inline u32 lsb(const u8 l) { return (1U<<l)-1U; } typedef struct { u32 a; u32 msk; u32 v; u8 rs:1; u8 aw:7; } Reg; __attribute__((always_inline)) inline Reg GI(u32 A, u32 N, u32 RS, u8 AW) { Reg R={A+4*N,lsb(32),0,RS?1:0,AW}; return R; } __attribute__((always_inline)) inline u32 GS(Reg R) { for (u32 i=0, msk=lsb(8); R.aw==1 && msk; i++, msk<<=8) if ( !~(R.msk | msk) ) { const u8 v = R.v >> (i*8); if (R.rs || msk==~R.msk) return (((volatile u8*)R.a)[i] = v) << (i*8); else if (R.v==~R.msk) return (((volatile u8*)R.a)[i] |= v) << (i*8); return (((volatile u8*)R.a)[i] = (((volatile u8*)R.a)[i] & (R.msk>>(i*8))) | v) << (i*8); } return 0; } __attribute__((always_inline)) inline Reg GU(Reg R, u32 A, u32 N, u8 o, u8 w, u32 v) { const u32 msk=~(lsb(w)<<o); R.msk&=msk; R.v&=msk; R.v |= (v<<o); return R; } u32 artiSP() { return GS(GU(GI(0xE0000000,42,0,1),0xE0000000,42,17,3,2)); } > cat a.s artiSP: sub sp, sp, #16 mov r2, sp movs r3, #2 strb r3, [r2, #12] ... add sp, sp, #16 bx lr I compile it on a Intel(R) Pentium(R) Silver J5040 CPU @ 2.00GHz running Void Linux (kernel: 6.3.13_1) for a STM32G030.