The following makes FRE optimize a load we formerly required
SRA + CCP for which now run after we get rid of all __builtin_constant_p
calls.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-03-15  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/98834
        * tree-ssa-sccvn.c (vn_reference_lookup_3): Handle missing
        subsetting by truncating the access size.

        * g++.dg/opt/pr98834.C: New testcase.
---
 gcc/testsuite/g++.dg/opt/pr98834.C | 71 ++++++++++++++++++++++++++++++
 gcc/tree-ssa-sccvn.c               | 12 ++++-
 2 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/opt/pr98834.C

diff --git a/gcc/testsuite/g++.dg/opt/pr98834.C 
b/gcc/testsuite/g++.dg/opt/pr98834.C
new file mode 100644
index 00000000000..fafd3227181
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr98834.C
@@ -0,0 +1,71 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target c++17 } */
+/* { dg-options "-O2 -fdump-tree-fre3" } */
+
+struct _Base
+{ 
+  int _M_data = 0;
+};
+
+struct _Wrapper : _Base
+{ 
+  _Wrapper(int) {}
+
+  bool _M_is_constprop() { return __builtin_constant_p(_M_data); }
+};
+
+struct _Impl
+{ 
+  _Wrapper _S_multiplies(_Wrapper __x, _Wrapper __y)
+  { 
+    if (__x._M_is_constprop() || __y._M_is_constprop())
+      return __y;
+    return 0;
+  }
+};
+
+struct _TupleData
+{ 
+  _Wrapper first;
+  int second;
+};
+
+struct _Tuple : _TupleData
+{ 
+  template <typename _Fp>
+    _Tuple _M_apply_per_chunk(_Fp __fun, _Tuple __y)
+    { 
+      return {__fun(first, __y.first), second};
+    }
+};
+
+struct _ImplFixed
+{ 
+  static _Tuple _S_multiplies(_Tuple __x, _Tuple __y)
+  { 
+    return __x._M_apply_per_chunk(
+             []( auto __xx, auto __yy) {
+               return _Impl()._S_multiplies(__xx, __yy);
+             },
+             __y);
+  }
+};
+
+class simd
+{
+public:
+  [[__gnu__::__always_inline__]] friend simd operator*(simd __x, simd __y)
+  { return _ImplFixed::_S_multiplies(__x._M_data, __y._M_data); }
+
+  simd(_Tuple __init) : _M_data(__init) {}
+
+  _Tuple _M_data;
+};
+
+int main()
+{ 
+  simd({0, 0}) * simd({0, 0});
+}
+
+/* FRE3 should elide all conditionals in the remaining main.  */
+/* { dg-final { scan-tree-dump-times "<bb" 1 "fre3" } } */
diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
index 65b3967b9e1..99759a8744a 100644
--- a/gcc/tree-ssa-sccvn.c
+++ b/gcc/tree-ssa-sccvn.c
@@ -3215,7 +3215,17 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void 
*data_,
        return (void *)-1;
       /* This can happen with bitfields.  */
       if (maybe_ne (ref->size, r.size))
-       return (void *)-1;
+       {
+         /* If the access lacks some subsetting simply apply that by
+            shortening it.  That in the end can only be successful
+            if we can pun the lookup result which in turn requires
+            exact offsets.  */
+         if (known_eq (r.size, r.max_size)
+             && known_lt (ref->size, r.size))
+           r.size = r.max_size = ref->size;
+         else
+           return (void *)-1;
+       }
       *ref = r;
 
       /* Do not update last seen VUSE after translating.  */
-- 
2.26.2

Reply via email to