The following makes FRE optimize a load we formerly required SRA + CCP for which now run after we get rid of all __builtin_constant_p calls.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. 2021-03-15 Richard Biener <rguent...@suse.de> PR tree-optimization/98834 * tree-ssa-sccvn.c (vn_reference_lookup_3): Handle missing subsetting by truncating the access size. * g++.dg/opt/pr98834.C: New testcase. --- gcc/testsuite/g++.dg/opt/pr98834.C | 71 ++++++++++++++++++++++++++++++ gcc/tree-ssa-sccvn.c | 12 ++++- 2 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/opt/pr98834.C diff --git a/gcc/testsuite/g++.dg/opt/pr98834.C b/gcc/testsuite/g++.dg/opt/pr98834.C new file mode 100644 index 00000000000..fafd3227181 --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/pr98834.C @@ -0,0 +1,71 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target c++17 } */ +/* { dg-options "-O2 -fdump-tree-fre3" } */ + +struct _Base +{ + int _M_data = 0; +}; + +struct _Wrapper : _Base +{ + _Wrapper(int) {} + + bool _M_is_constprop() { return __builtin_constant_p(_M_data); } +}; + +struct _Impl +{ + _Wrapper _S_multiplies(_Wrapper __x, _Wrapper __y) + { + if (__x._M_is_constprop() || __y._M_is_constprop()) + return __y; + return 0; + } +}; + +struct _TupleData +{ + _Wrapper first; + int second; +}; + +struct _Tuple : _TupleData +{ + template <typename _Fp> + _Tuple _M_apply_per_chunk(_Fp __fun, _Tuple __y) + { + return {__fun(first, __y.first), second}; + } +}; + +struct _ImplFixed +{ + static _Tuple _S_multiplies(_Tuple __x, _Tuple __y) + { + return __x._M_apply_per_chunk( + []( auto __xx, auto __yy) { + return _Impl()._S_multiplies(__xx, __yy); + }, + __y); + } +}; + +class simd +{ +public: + [[__gnu__::__always_inline__]] friend simd operator*(simd __x, simd __y) + { return _ImplFixed::_S_multiplies(__x._M_data, __y._M_data); } + + simd(_Tuple __init) : _M_data(__init) {} + + _Tuple _M_data; +}; + +int main() +{ + simd({0, 0}) * simd({0, 0}); +} + +/* FRE3 should elide all conditionals in the remaining main. */ +/* { dg-final { scan-tree-dump-times "<bb" 1 "fre3" } } */ diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c index 65b3967b9e1..99759a8744a 100644 --- a/gcc/tree-ssa-sccvn.c +++ b/gcc/tree-ssa-sccvn.c @@ -3215,7 +3215,17 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, return (void *)-1; /* This can happen with bitfields. */ if (maybe_ne (ref->size, r.size)) - return (void *)-1; + { + /* If the access lacks some subsetting simply apply that by + shortening it. That in the end can only be successful + if we can pun the lookup result which in turn requires + exact offsets. */ + if (known_eq (r.size, r.max_size) + && known_lt (ref->size, r.size)) + r.size = r.max_size = ref->size; + else + return (void *)-1; + } *ref = r; /* Do not update last seen VUSE after translating. */ -- 2.26.2