https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80813
Jan Hubicka <hubicka at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |NEW
Summary|x86: |[12/13/14/15 Regression]
|std::vector<bool>::operator |x86:
|[] could be somewhat faster |std::vector<bool>::operator
|using BT instead of SHL |[] could be somewhat faster
| |using BT instead of SHL
Ever confirmed|0 |1
Last reconfirmed| |2024-12-20
--- Comment #2 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
trunk does
f(std::vector<bool, std::allocator<bool> > const&, unsigned long):
testq %rsi, %rsi
leaq 63(%rsi), %rax
movq (%rdi), %rdx
cmovns %rsi, %rax
sarq $6, %rax
leaq (%rdx,%rax,8), %rdx
movq %rsi, %rax
sarq $63, %rax
shrq $58, %rax
addq %rax, %rsi
andl $63, %esi
subq %rax, %rsi
jns .L2
addq $64, %rsi
subq $8, %rdx
.L2:
movl $1, %eax
shlx %rsi, %rax, %rax
andq (%rdx), %rax
setne %al
ret
Removing basic block 5
bool f (const struct vector & v, size_t x)
{
difference_type __n;
_Bit_type * const SR.16;
_Bit_type * _4;
long int __n.0_5;
long unsigned int _12;
long unsigned int _13;
long unsigned int _14;
bool _15;
long int _16;
long int _20;
long unsigned int _21;
long unsigned int _22;
_Bit_type * _23;
_Bit_type * _26;
unsigned int _42;
<bb 2> [local count: 1073741824]:
_4 = v_2(D)->D.25666._M_impl.D.25135._M_start.D.16486._M_p;
__n.0_5 = (long int) x_3(D);
_20 = __n.0_5 / 64;
_21 = (long unsigned int) _20;
_22 = _21 * 8;
_23 = _4 + _22;
__n_24 = __n.0_5 % 64;
if (__n_24 < 0)
goto <bb 3>; [41.00%]
else
goto <bb 4>; [59.00%]
<bb 3> [local count: 440234144]:
__n_25 = __n_24 + 64;
_26 = _23 + 18446744073709551608;
<bb 4> [local count: 1073741824]:
# SR.16_41 = PHI <_26(3), _23(2)>
# _16 = PHI <__n_25(3), __n_24(2)>
_42 = (unsigned int) _16;
_12 = 1 << _42;
_13 = *SR.16_41;
_14 = _12 & _13;
_15 = _14 != 0;
return _15;
}
This is a regression since gcc 7 which produces more reasonable code:
f(std::vector<bool, std::allocator<bool> > const&, unsigned long):
movq (%rdi), %rdx
movq %rsi, %rcx
movq %rsi, %rax
movl $1, %esi
shrq $6, %rcx
shlx %rax, %rsi, %rsi
andq (%rdx,%rcx,8), %rsi
setne %al
ret
clang:
f(std::vector<bool, std::allocator<bool>> const&, unsigned long):
leaq 63(%rsi), %rax
testq %rsi, %rsi
cmovnsq %rsi, %rax
sarq $6, %rax
shlq $3, %rax
addq (%rdi), %rax
movabsq $-9223372036854775808, %rcx
leaq 63(%rcx), %rdx
andq %rsi, %rdx
xorl %edi, %edi
cmpq %rcx, %rdx
setbe %dil
movq -8(%rax,%rdi,8), %rax
btq %rsi, %rax
setb %al
retq
clang with libc++
f(std::__1::vector<bool, std::__1::allocator<bool>> const&, unsigned long):
movq (%rdi), %rax
movq %rsi, %rcx
shrq $6, %rcx
movq (%rax,%rcx,8), %rax
btq %rsi, %rax
setb %al
retq