https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118888
Bug ID: 118888
Summary: GCC only optimize 1 bits-manipulation function out of
many despite having the same implementations.
Product: gcc
Version: 14.2.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: evanhyd2003 at gmail dot com
Target Milestone: ---
Created attachment 60504
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=60504&action=edit
Compile this source code with -S and -O3 to observe its assembly output.
gcc version 14.2.0 (GCC)
Target: x86_64-w64-mingw32
COLLECT_GCC=C:\Users\evanh\works\coding\w64devkit\bin\gcc.exe
COLLECT_LTO_WRAPPER=C:/Users/evanh/works/coding/w64devkit/bin/../libexec/gcc/x86_64-w64-mingw32/14.2.0/lto-wrapper.exe
Configured with: /gcc-14.2.0/configure --prefix=/w64devkit
--with-sysroot=/w64devkit/x86_64-w64-mingw32
--with-native-system-header-dir=/include --target=x86_64-w64-mingw32
--host=x86_64-w64-mingw32 --enable-static --disable-shared --with-pic
--with-gmp-include=/deps/include --with-gmp-lib=/deps/lib
--with-mpc-include=/deps/include --with-mpc-lib=/deps/lib
--with-mpfr-include=/deps/include --with-mpfr-lib=/deps/lib
--enable-languages=c,c++,fortran --enable-libgomp --enable-threads=posix
--enable-version-specific-runtime-libs --disable-dependency-tracking
--disable-lto --disable-multilib --disable-nls --disable-win32-registry
--enable-mingw-wildcard CFLAGS_FOR_TARGET=-Os CXXFLAGS_FOR_TARGET=-Os
LDFLAGS_FOR_TARGET=-s CFLAGS=-Os CXXFLAGS=-Os LDFLAGS=-s
Thread model: posix
Supported LTO compression algorithms: zlib
Command to gcc that triggers the bug: g++ -S .\bug.cpp -O3
Expected behavior:
All versions of setSquare() should get optimized to assembly code similar
to setSquare3()'s.
Actual behaviors:
Only setSquare3()'s assembly code got optimized despite having the same
implementation as other setSquare() version. In addition, the code optimization
breaks when you shuffle the function order. The correct optimization only
applies to 1 function at a time.
.file "bug.cpp"
.text
.p2align 4
.globl _Z10setSquare1yjjjj
.def _Z10setSquare1yjjjj; .scl 2; .type 32; .endef
.seh_proc _Z10setSquare1yjjjj
_Z10setSquare1yjjjj:
.LFB31:
.seh_endprologue
movl $1, %eax
movq %rax, %r11
movq %rcx, %r10
movl %r9d, %ecx
salq %cl, %r11
movl 40(%rsp), %ecx
movq %r11, %r9
movq %rax, %r11
btsq %rcx, %r9
movl %r8d, %ecx
salq %cl, %r11
movl %edx, %ecx
orq %r11, %r9
salq %cl, %rax
orq %r10, %r9
orq %r9, %rax
ret
.seh_endproc
.p2align 4
.globl _Z10setSquare2yjjjj
.def _Z10setSquare2yjjjj; .scl 2; .type 32; .endef
.seh_proc _Z10setSquare2yjjjj
_Z10setSquare2yjjjj:
.LFB32:
.seh_endprologue
movl $1, %eax
movq %rax, %r11
movq %rcx, %r10
movl %r9d, %ecx
salq %cl, %r11
movl 40(%rsp), %ecx
movq %r11, %r9
btsq %rcx, %r9
movl %r8d, %ecx
orq %r10, %r9
movq %rax, %r10
salq %cl, %r10
movl %edx, %ecx
orq %r10, %r9
salq %cl, %rax
orq %r9, %rax
ret
.seh_endproc
.p2align 4
.globl _Z10setSquare3yjjjj
.def _Z10setSquare3yjjjj; .scl 2; .type 32; .endef
.seh_proc _Z10setSquare3yjjjj
_Z10setSquare3yjjjj:
.LFB33:
.seh_endprologue
movl 40(%rsp), %eax
btsq %rax, %rcx
btsq %r9, %rcx
btsq %r8, %rcx
movq %rcx, %rax
btsq %rdx, %rax
ret
.seh_endproc
.p2align 4
.globl _Z10setSquare4yjjjj
.def _Z10setSquare4yjjjj; .scl 2; .type 32; .endef
.seh_proc _Z10setSquare4yjjjj
_Z10setSquare4yjjjj:
.LFB34:
.seh_endprologue
movl $1, %r10d
movq %r10, %rax
movq %rcx, %r11
movl %r9d, %ecx
salq %cl, %rax
movl 40(%rsp), %ecx
btsq %rcx, %rax
movl %r8d, %ecx
orq %r11, %rax
movq %r10, %r11
salq %cl, %r11
movl %edx, %ecx
orq %r11, %rax
salq %cl, %r10
orq %r10, %rax
ret
.seh_endproc
.p2align 4
.globl _Z10setSquare5yjjjj
.def _Z10setSquare5yjjjj; .scl 2; .type 32; .endef
.seh_proc _Z10setSquare5yjjjj
_Z10setSquare5yjjjj:
.LFB41:
.seh_endprologue
movl $1, %eax
movq %rax, %r11
movq %rcx, %r10
movl %r9d, %ecx
salq %cl, %r11
movl 40(%rsp), %ecx
movq %r11, %r9
movq %rax, %r11
btsq %rcx, %r9
movl %r8d, %ecx
salq %cl, %r11
movl %edx, %ecx
orq %r11, %r9
salq %cl, %rax
orq %r10, %r9
orq %r9, %rax
ret
.seh_endproc
.p2align 4
.globl _Z10setSquare6yjjjj
.def _Z10setSquare6yjjjj; .scl 2; .type 32; .endef
.seh_proc _Z10setSquare6yjjjj
_Z10setSquare6yjjjj:
.LFB39:
.seh_endprologue
movl $1, %eax
movq %rax, %r11
movq %rcx, %r10
movl %r9d, %ecx
salq %cl, %r11
movl 40(%rsp), %ecx
movq %r11, %r9
movq %rax, %r11
btsq %rcx, %r9
movl %r8d, %ecx
salq %cl, %r11
movl %edx, %ecx
orq %r11, %r9
salq %cl, %rax
orq %r10, %r9
orq %r9, %rax
ret
.seh_endproc
.ident "GCC: (GNU) 14.2.0"
Compiler Explorer: https://godbolt.org/z/GnbKzd33s