https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119042
Bug ID: 119042
Summary: Optimize more !struct.x && !struct.y codegen cases
Product: gcc
Version: 15.0
Status: UNCONFIRMED
Keywords: missed-optimization
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: ktkachov at gcc dot gnu.org
Target Milestone: ---
Target: aarch64
Taken from the LLVM report: https://github.com/llvm/llvm-project/issues/128778
the cases apply to GCC as well:
#include <cstdint>
struct S1 {
bool x;
bool y;
};
struct S2 {
bool x : 1;
bool y : 1;
};
struct S3 {
uint8_t x : 1;
uint8_t y : 1;
};
struct S4 {
uint8_t x ;
uint8_t y ;
};
extern "C" {
auto src1(S1 it) -> bool { return !it.x && !it.y; }
auto tgt1(S1 it) -> bool { return (it.x == 0) && (it.y == 0); }
auto src2(S2 it) -> bool { return !it.x && !it.y; }
auto tgt2(S2 it) -> bool { return (it.x == 0) && (it.y == 0); }
auto src3(S3 it) -> bool { return !it.x && !it.y; }
auto tgt3(S3 it) -> bool { return (it.x == 0) && (it.y == 0); }
auto src4(S4 it) -> bool { return !it.x && !it.y; }
auto tgt4(S4 it) -> bool { return (it.x == 0) && (it.y == 0); }
}
on aarch64, for, example with -O3 generates:
src1:
ubfx x1, x0, 8, 8
tst x0, 1
eor w1, w1, 1
csel w0, w1, wzr, eq
ret
tgt1:
ubfx x1, x0, 8, 8
tst x0, 1
eor w1, w1, 1
csel w0, w1, wzr, eq
ret
src2:
ubfx x1, x0, 1, 1
tst x0, 1
eor w1, w1, 1
and w1, w1, 255
csel w0, w1, wzr, eq
ret
tgt2:
ubfx x1, x0, 0, 1
ubfx x0, x0, 1, 1
orr w0, w1, w0
eor w0, w0, 1
ret
src3:
tst x0, 3
cset w0, eq
ret
tgt3:
tst x0, 3
cset w0, eq
ret
src4:
ubfx x1, x0, 8, 8
orr w1, w1, w0
tst w1, 255
cset w0, eq
ret
tgt4:
ubfx x1, x0, 8, 8
orr w1, w1, w0
tst w1, 255
cset w0, eq
ret
The ubfx and orr/eor instructions can probably be optimised away.
I'm not yet sure if this needs to be done in the target or the initial GIMPLE
lowering of the conditionals, marking as tree-optimization initially