Made the change and also created the ChangeLogs.
gcc/ChangeLog:
* tree-if-conv.cc (if_convertible_loop_p_1): Move ordering of
loop bb's from here...
(tree_if_conversion): ... to here. Also call bitfield lowering
when appropriate.
(version_loop_for_if_conversion): Adapt to enable loop
versioning when we only need
to lower bitfields.
(ifcvt_split_critical_edges): Relax condition of expected loop
form as this is checked earlier.
(get_bitfield_rep): New function.
(lower_bitfield): Likewise.
(bitfields_to_lower_p): Likewise.
(need_to_lower_bitfields): New global boolean.
(need_to_ifcvt): Likewise.
* tree-vect-data-refs.cc (vect_find_stmt_data_reference):
Improve diagnostic message.
* tree-vect-patterns.cc (vect_recog_temp_ssa_var): Add default
value for last parameter.
(vect_recog_bitfield_ref_pattern): New.
(vect_recog_bit_insert_pattern): New.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/vect-bitfield-read-1.c: New test.
* gcc.dg/vect/vect-bitfield-read-2.c: New test.
* gcc.dg/vect/vect-bitfield-read-3.c: New test.
* gcc.dg/vect/vect-bitfield-read-4.c: New test.
* gcc.dg/vect/vect-bitfield-read-5.c: New test.
* gcc.dg/vect/vect-bitfield-read-6.c: New test.
* gcc.dg/vect/vect-bitfield-write-1.c: New test.
* gcc.dg/vect/vect-bitfield-write-2.c: New test.
* gcc.dg/vect/vect-bitfield-write-3.c: New test.
* gcc.dg/vect/vect-bitfield-write-4.c: New test.
* gcc.dg/vect/vect-bitfield-write-5.c: New test.
On 28/09/2022 10:43, Andre Vieira (lists) via Gcc-patches wrote:
On 27/09/2022 13:34, Richard Biener wrote:
On Mon, 26 Sep 2022, Andre Vieira (lists) wrote:
On 08/09/2022 12:51, Richard Biener wrote:
I'm curious, why the push to redundant_ssa_names? That could use
a comment ...
So I purposefully left a #if 0 #else #endif in there so you can see
the two
options. But the reason I used redundant_ssa_names is because ifcvt
seems to
use that as a container for all pairs of (old, new) ssa names to
replace
later. So I just piggy backed on that. I don't know if there's a
specific
reason they do the replacement at the end? Maybe some ordering
issue? Either
way both adding it to redundant_ssa_names or doing the replacement
inline work
for the bitfield lowering (or work in my testing at least).
Possibly because we (in the past?) inserted/copied stuff based on
predicates generated at analysis time after we decide to elide something
so we need to watch for later appearing uses. But who knows ... my mind
fails me here.
If it works to replace uses immediately please do so. But now
I wonder why we need this - the value shouldn't change so you
should get away with re-using the existing SSA name for the final value?
Yeah... good point. A quick change and minor testing seems to agree.
I'm sure I had a good reason to do it initially ;)
I'll run a full-regression on this change to make sure I didn't miss
anything.
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
new file mode 100644
index
0000000000000000000000000000000000000000..01cf34fb44484ca926ca5de99eef76dd99b69e92
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
@@ -0,0 +1,40 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s { int i : 31; };
+
+#define ELT0 {0}
+#define ELT1 {1}
+#define ELT2 {2}
+#define ELT3 {3}
+#define N 32
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += ptr[i].i;
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
new file mode 100644
index
0000000000000000000000000000000000000000..1a4a1579c1478b9407ad21b19e8fbdca9f674b42
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
@@ -0,0 +1,43 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0x7FFFFFFFUL, 0}
+#define ELT1 {0x7FFFFFFFUL, 1}
+#define ELT2 {0x7FFFFFFFUL, 2}
+#define ELT3 {0x7FFFFFFFUL, 3}
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += ptr[i].a;
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
new file mode 100644
index
0000000000000000000000000000000000000000..216611a29fd8bbfbafdbdb79d790e520f44ba672
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
@@ -0,0 +1,43 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+#include <stdbool.h>
+
+extern void abort(void);
+
+typedef struct {
+ int c;
+ int b;
+ bool a : 1;
+} struct_t;
+
+#define N 16
+#define ELT_F { 0xFFFFFFFF, 0xFFFFFFFF, 0 }
+#define ELT_T { 0xFFFFFFFF, 0xFFFFFFFF, 1 }
+
+struct_t vect_false[N] = { ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F,
ELT_F,
+ ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F,
ELT_F };
+struct_t vect_true[N] = { ELT_F, ELT_F, ELT_T, ELT_F, ELT_F, ELT_F, ELT_F,
ELT_F,
+ ELT_F, ELT_F, ELT_T, ELT_F, ELT_F, ELT_F, ELT_F,
ELT_F };
+int main (void)
+{
+ unsigned ret = 0;
+ for (unsigned i = 0; i < N; i++)
+ {
+ ret |= vect_false[i].a;
+ }
+ if (ret)
+ abort ();
+
+ for (unsigned i = 0; i < N; i++)
+ {
+ ret |= vect_true[i].a;
+ }
+ if (!ret)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
new file mode 100644
index
0000000000000000000000000000000000000000..5bc9c412e9616aefcbf49a4518f1603380a54b2f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
@@ -0,0 +1,45 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char x : 2;
+ char a : 4;
+};
+
+#define N 32
+#define ELT0 {0x7FFFFFFFUL, 3, 0}
+#define ELT1 {0x7FFFFFFFUL, 3, 1}
+#define ELT2 {0x7FFFFFFFUL, 3, 2}
+#define ELT3 {0x7FFFFFFFUL, 3, 3}
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += ptr[i].a;
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c
new file mode 100644
index
0000000000000000000000000000000000000000..1dc24d3eded192144dc9ad94589b4c5c3d999e65
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c
@@ -0,0 +1,42 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned a : 23; unsigned b : 9;
+};
+
+#define N 32
+#define ELT0 {0x7FFFFFUL, 0}
+#define ELT1 {0x7FFFFFUL, 1}
+#define ELT2 {0x7FFFFFUL, 2}
+#define ELT3 {0x7FFFFFUL, 3}
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += ptr[i].b;
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c
new file mode 100644
index
0000000000000000000000000000000000000000..7d24c29975865883a7cdc7aa057fbb6bf413e0bc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c
@@ -0,0 +1,42 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned a : 23; unsigned b : 8;
+};
+
+#define N 32
+#define ELT0 {0x7FFFFFUL, 0}
+#define ELT1 {0x7FFFFFUL, 1}
+#define ELT2 {0x7FFFFFUL, 2}
+#define ELT3 {0x7FFFFFUL, 3}
+#define RES 48
+struct s A[N]
+ = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+ ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ int res = 0;
+ for (int i = 0; i < n; ++i)
+ res += ptr[i].b;
+ return res;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ if (f(&A[0], N) != RES)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c
new file mode 100644
index
0000000000000000000000000000000000000000..19683d277b1ade1034496136f1d03bb2b446900f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c
@@ -0,0 +1,39 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s { int i : 31; };
+
+#define N 32
+#define V 5
+struct s A[N];
+
+void __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ for (int i = 0; i < n; ++i)
+ ptr[i].i = V;
+}
+
+void __attribute__ ((noipa))
+check_f(struct s *ptr) {
+ for (unsigned i = 0; i < N; ++i)
+ if (ptr[i].i != V)
+ abort ();
+}
+
+int main (void)
+{
+ check_vect ();
+ __builtin_memset (&A[0], 0, sizeof(struct s) * N);
+
+ f(&A[0], N);
+ check_f (&A[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c
new file mode 100644
index
0000000000000000000000000000000000000000..d550dd35ab75eb67f6e53f89fbf55b7315e50bc9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c
@@ -0,0 +1,42 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char a : 4;
+};
+
+#define N 32
+#define V 5
+struct s A[N];
+
+void __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ for (int i = 0; i < n; ++i)
+ ptr[i].a = V;
+}
+
+void __attribute__ ((noipa))
+check_f(struct s *ptr) {
+ for (unsigned i = 0; i < N; ++i)
+ if (ptr[i].a != V)
+ abort ();
+}
+
+int main (void)
+{
+ check_vect ();
+ __builtin_memset (&A[0], 0, sizeof(struct s) * N);
+
+ f(&A[0], N);
+ check_f (&A[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c
new file mode 100644
index
0000000000000000000000000000000000000000..3303d2610ff972d986be172962c129634ee64254
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c
@@ -0,0 +1,43 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned i : 31;
+ char x : 2;
+ char a : 4;
+};
+
+#define N 32
+#define V 5
+struct s A[N];
+
+void __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ for (int i = 0; i < n; ++i)
+ ptr[i].a = V;
+}
+
+void __attribute__ ((noipa))
+check_f(struct s *ptr) {
+ for (unsigned i = 0; i < N; ++i)
+ if (ptr[i].a != V)
+ abort ();
+}
+
+int main (void)
+{
+ check_vect ();
+ __builtin_memset (&A[0], 0, sizeof(struct s) * N);
+
+ f(&A[0], N);
+ check_f (&A[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c
new file mode 100644
index
0000000000000000000000000000000000000000..fae6ea3557dcaba7b330ebdaa471281d33d2ba15
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c
@@ -0,0 +1,42 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned b : 23;
+ unsigned a : 9;
+};
+
+#define N 32
+#define V 5
+struct s A[N];
+
+void __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ for (int i = 0; i < n; ++i)
+ ptr[i].a = V;
+}
+
+void __attribute__ ((noipa))
+check_f(struct s *ptr) {
+ for (unsigned i = 0; i < N; ++i)
+ if (ptr[i].a != V)
+ abort ();
+}
+
+int main (void)
+{
+ check_vect ();
+ __builtin_memset (&A[0], 0, sizeof(struct s) * N);
+
+ f(&A[0], N);
+ check_f (&A[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c
new file mode 100644
index
0000000000000000000000000000000000000000..99360c2967b076212c67eb4f34b8fd91711d8821
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c
@@ -0,0 +1,42 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+ unsigned b : 23;
+ unsigned a : 8;
+};
+
+#define N 32
+#define V 5
+struct s A[N];
+
+void __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+ for (int i = 0; i < n; ++i)
+ ptr[i].a = V;
+}
+
+void __attribute__ ((noipa))
+check_f(struct s *ptr) {
+ for (unsigned i = 0; i < N; ++i)
+ if (ptr[i].a != V)
+ abort ();
+}
+
+int main (void)
+{
+ check_vect ();
+ __builtin_memset (&A[0], 0, sizeof(struct s) * N);
+
+ f(&A[0], N);
+ check_f (&A[0]);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index
1c8e1a45234b8c3565edaacd55abbee23d8ea240..d13b2fa6661d56e911bb9ec37cd3a9885fa653bb
100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -91,6 +91,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-pass.h"
#include "ssa.h"
#include "expmed.h"
+#include "expr.h"
#include "optabs-query.h"
#include "gimple-pretty-print.h"
#include "alias.h"
@@ -123,6 +124,9 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vectorizer.h"
#include "tree-eh.h"
+/* For lang_hooks.types.type_for_mode. */
+#include "langhooks.h"
+
/* Only handle PHIs with no more arguments unless we are asked to by
simd pragma. */
#define MAX_PHI_ARG_NUM \
@@ -145,6 +149,12 @@ static bool need_to_rewrite_undefined;
before phi_convertible_by_degenerating_args. */
static bool any_complicated_phi;
+/* True if we have bitfield accesses we can lower. */
+static bool need_to_lower_bitfields;
+
+/* True if there is any ifcvting to be done. */
+static bool need_to_ifcvt;
+
/* Hash for struct innermost_loop_behavior. It depends on the user to
free the memory. */
@@ -1411,15 +1421,6 @@ if_convertible_loop_p_1 (class loop *loop,
vec<data_reference_p> *refs)
calculate_dominance_info (CDI_DOMINATORS);
- /* Allow statements that can be handled during if-conversion. */
- ifc_bbs = get_loop_body_in_if_conv_order (loop);
- if (!ifc_bbs)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Irreducible loop\n");
- return false;
- }
-
for (i = 0; i < loop->num_nodes; i++)
{
basic_block bb = ifc_bbs[i];
@@ -2898,18 +2899,22 @@ version_loop_for_if_conversion (class loop *loop,
vec<gimple *> *preds)
class loop *new_loop;
gimple *g;
gimple_stmt_iterator gsi;
- unsigned int save_length;
+ unsigned int save_length = 0;
g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
build_int_cst (integer_type_node, loop->num),
integer_zero_node);
gimple_call_set_lhs (g, cond);
- /* Save BB->aux around loop_version as that uses the same field. */
- save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
- void **saved_preds = XALLOCAVEC (void *, save_length);
- for (unsigned i = 0; i < save_length; i++)
- saved_preds[i] = ifc_bbs[i]->aux;
+ void **saved_preds = NULL;
+ if (any_complicated_phi || need_to_predicate)
+ {
+ /* Save BB->aux around loop_version as that uses the same field. */
+ save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
+ saved_preds = XALLOCAVEC (void *, save_length);
+ for (unsigned i = 0; i < save_length; i++)
+ saved_preds[i] = ifc_bbs[i]->aux;
+ }
initialize_original_copy_tables ();
/* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
@@ -2921,8 +2926,9 @@ version_loop_for_if_conversion (class loop *loop,
vec<gimple *> *preds)
profile_probability::always (), true);
free_original_copy_tables ();
- for (unsigned i = 0; i < save_length; i++)
- ifc_bbs[i]->aux = saved_preds[i];
+ if (any_complicated_phi || need_to_predicate)
+ for (unsigned i = 0; i < save_length; i++)
+ ifc_bbs[i]->aux = saved_preds[i];
if (new_loop == NULL)
return NULL;
@@ -2998,7 +3004,7 @@ ifcvt_split_critical_edges (class loop *loop, bool
aggressive_if_conv)
auto_vec<edge> critical_edges;
/* Loop is not well formed. */
- if (num <= 2 || loop->inner || !single_exit (loop))
+ if (loop->inner)
return false;
body = get_loop_body (loop);
@@ -3259,6 +3265,201 @@ ifcvt_hoist_invariants (class loop *loop, edge pe)
free (body);
}
+/* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its
+ type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64
+ value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR,
+ if not NULL, will hold the tree representing the base struct of this
+ bitfield. */
+
+static tree
+get_bitfield_rep (gassign *stmt, bool write, tree *bitpos,
+ tree *struct_expr)
+{
+ tree comp_ref = write ? gimple_assign_lhs (stmt)
+ : gimple_assign_rhs1 (stmt);
+
+ tree field_decl = TREE_OPERAND (comp_ref, 1);
+ tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl);
+
+ /* Bail out if the representative is BLKmode as we will not be able to
+ vectorize this. */
+ if (TYPE_MODE (TREE_TYPE (rep_decl)) == E_BLKmode)
+ return NULL_TREE;
+
+ /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's
+ precision. */
+ unsigned HOST_WIDE_INT bf_prec
+ = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt)));
+ if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0)
+ return NULL_TREE;
+
+ if (struct_expr)
+ *struct_expr = TREE_OPERAND (comp_ref, 0);
+
+ if (bitpos)
+ *bitpos
+ = fold_build2 (MINUS_EXPR, bitsizetype,
+ DECL_FIELD_BIT_OFFSET (field_decl),
+ DECL_FIELD_BIT_OFFSET (rep_decl));
+
+ return rep_decl;
+
+}
+
+/* Lowers the bitfield described by DATA.
+ For a write like:
+
+ struct.bf = _1;
+
+ lower to:
+
+ __ifc_1 = struct.<representative>;
+ __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos);
+ struct.<representative> = __ifc_2;
+
+ For a read:
+
+ _1 = struct.bf;
+
+ lower to:
+
+ __ifc_1 = struct.<representative>;
+ _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos);
+
+ where representative is a legal load that contains the bitfield value,
+ bitsize is the size of the bitfield and bitpos the offset to the start of
+ the bitfield within the representative. */
+
+static void
+lower_bitfield (gassign *stmt, bool write)
+{
+ tree struct_expr;
+ tree bitpos;
+ tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr);
+ tree rep_type = TREE_TYPE (rep_decl);
+ tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt));
+
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Lowering:\n");
+ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+ fprintf (dump_file, "to:\n");
+ }
+
+ /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's
+ defining SSA_NAME. */
+ tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl,
+ NULL_TREE);
+ tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
+
+ if (write)
+ {
+ new_val = ifc_temp_var (rep_type,
+ build3 (BIT_INSERT_EXPR, rep_type, new_val,
+ unshare_expr (gimple_assign_rhs1 (stmt)),
+ bitpos), &gsi);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM);
+
+ gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref),
+ new_val);
+ gimple_move_vops (new_stmt, stmt);
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
+ }
+ else
+ {
+ tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val,
+ build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)),
+ bitpos);
+ new_val = ifc_temp_var (bf_type, bfr, &gsi);
+
+ gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt),
+ new_val);
+ gimple_move_vops (new_stmt, stmt);
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
+ }
+
+ gsi_remove (&gsi, true);
+}
+
+/* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER
+ with data structures representing these bitfields. */
+
+static bool
+bitfields_to_lower_p (class loop *loop,
+ vec <gassign *> &reads_to_lower,
+ vec <gassign *> &writes_to_lower)
+{
+ gimple_stmt_iterator gsi;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num);
+ }
+
+ for (unsigned i = 0; i < loop->num_nodes; ++i)
+ {
+ basic_block bb = ifc_bbs[i];
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gassign *stmt = dyn_cast<gassign*> (gsi_stmt (gsi));
+ if (!stmt)
+ continue;
+
+ tree op = gimple_assign_lhs (stmt);
+ bool write = TREE_CODE (op) == COMPONENT_REF;
+
+ if (!write)
+ op = gimple_assign_rhs1 (stmt);
+
+ if (TREE_CODE (op) != COMPONENT_REF)
+ continue;
+
+ if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\t Bitfield NO OK to lower,"
+ " field type is not Integral.\n");
+ return false;
+ }
+
+ if (!get_bitfield_rep (stmt, write, NULL, NULL))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\t Bitfield NOT OK to lower,"
+ " representative is BLKmode.\n");
+ return false;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\tBitfield OK to lower.\n");
+ if (write)
+ writes_to_lower.safe_push (stmt);
+ else
+ reads_to_lower.safe_push (stmt);
+ }
+ }
+ }
+ return !reads_to_lower.is_empty () || !writes_to_lower.is_empty ();
+}
+
+
/* If-convert LOOP when it is legal. For the moment this pass has no
profitability analysis. Returns non-zero todo flags when something
changed. */
@@ -3269,12 +3470,16 @@ tree_if_conversion (class loop *loop, vec<gimple *>
*preds)
unsigned int todo = 0;
bool aggressive_if_conv;
class loop *rloop;
+ auto_vec <gassign *, 4> reads_to_lower;
+ auto_vec <gassign *, 4> writes_to_lower;
bitmap exit_bbs;
edge pe;
again:
rloop = NULL;
ifc_bbs = NULL;
+ need_to_lower_bitfields = false;
+ need_to_ifcvt = false;
need_to_predicate = false;
need_to_rewrite_undefined = false;
any_complicated_phi = false;
@@ -3290,16 +3495,42 @@ tree_if_conversion (class loop *loop, vec<gimple *>
*preds)
aggressive_if_conv = true;
}
- if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
+ if (!single_exit (loop))
goto cleanup;
- if (!if_convertible_loop_p (loop)
- || !dbg_cnt (if_conversion_tree))
+ /* If there are more than two BBs in the loop then there is at least one if
+ to convert. */
+ if (loop->num_nodes > 2
+ && !ifcvt_split_critical_edges (loop, aggressive_if_conv))
goto cleanup;
- if ((need_to_predicate || any_complicated_phi)
- && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
- || loop->dont_vectorize))
+ ifc_bbs = get_loop_body_in_if_conv_order (loop);
+ if (!ifc_bbs)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Irreducible loop\n");
+ goto cleanup;
+ }
+
+ if (loop->num_nodes > 2)
+ {
+ need_to_ifcvt = true;
+
+ if (!if_convertible_loop_p (loop) || !dbg_cnt (if_conversion_tree))
+ goto cleanup;
+
+ if ((need_to_predicate || any_complicated_phi)
+ && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
+ || loop->dont_vectorize))
+ goto cleanup;
+ }
+
+ if ((flag_tree_loop_vectorize || loop->force_vectorize)
+ && !loop->dont_vectorize)
+ need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower,
+ writes_to_lower);
+
+ if (!need_to_ifcvt && !need_to_lower_bitfields)
goto cleanup;
/* The edge to insert invariant stmts on. */
@@ -3310,7 +3541,8 @@ tree_if_conversion (class loop *loop, vec<gimple *>
*preds)
Either version this loop, or if the pattern is right for outer-loop
vectorization, version the outer loop. In the latter case we will
still if-convert the original inner loop. */
- if (need_to_predicate
+ if (need_to_lower_bitfields
+ || need_to_predicate
|| any_complicated_phi
|| flag_tree_loop_if_convert != 1)
{
@@ -3350,10 +3582,31 @@ tree_if_conversion (class loop *loop, vec<gimple *>
*preds)
pe = single_pred_edge (gimple_bb (preds->last ()));
}
- /* Now all statements are if-convertible. Combine all the basic
- blocks into one huge basic block doing the if-conversion
- on-the-fly. */
- combine_blocks (loop);
+ if (need_to_lower_bitfields)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "-------------------------\n");
+ fprintf (dump_file, "Start lowering bitfields\n");
+ }
+ while (!reads_to_lower.is_empty ())
+ lower_bitfield (reads_to_lower.pop (), false);
+ while (!writes_to_lower.is_empty ())
+ lower_bitfield (writes_to_lower.pop (), true);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Done lowering bitfields\n");
+ fprintf (dump_file, "-------------------------\n");
+ }
+ }
+ if (need_to_ifcvt)
+ {
+ /* Now all statements are if-convertible. Combine all the basic
+ blocks into one huge basic block doing the if-conversion
+ on-the-fly. */
+ combine_blocks (loop);
+ }
/* Perform local CSE, this esp. helps the vectorizer analysis if loads
and stores are involved. CSE only the loop body, not the entry
@@ -3393,6 +3646,8 @@ tree_if_conversion (class loop *loop, vec<gimple *>
*preds)
if (rloop != NULL)
{
loop = rloop;
+ reads_to_lower.truncate (0);
+ writes_to_lower.truncate (0);
goto again;
}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index
b279a82551eb70379804d405983ae5dc44b66bf5..e93cdc727da4bb7863b2ad13f29f7d550492adea
100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4301,7 +4301,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
free_data_ref (dr);
return opt_result::failure_at (stmt,
"not vectorized:"
- " statement is bitfield access %G", stmt);
+ " statement is an unsupported"
+ " bitfield access %G", stmt);
}
if (DR_BASE_ADDRESS (dr)
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index
dfbfb71b3c69a0205ccc1b287cb50fa02a70942e..9042599f04399eca37fe9038d2bd5c9f78e3a9e4
100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -35,6 +35,8 @@ along with GCC; see the file COPYING3. If not see
#include "tree-eh.h"
#include "gimplify.h"
#include "gimple-iterator.h"
+#include "gimple-fold.h"
+#include "gimplify-me.h"
#include "cfgloop.h"
#include "tree-vectorizer.h"
#include "dumpfile.h"
@@ -663,7 +665,7 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info
stmt_info, tree_code code,
is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
static tree
-vect_recog_temp_ssa_var (tree type, gimple *stmt)
+vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
{
return make_temp_ssa_name (type, stmt, "patt");
}
@@ -1828,6 +1830,329 @@ vect_recog_widen_sum_pattern (vec_info *vinfo,
return pattern_stmt;
}
+/* Function vect_recog_bitfield_ref_pattern
+
+ Try to find the following pattern:
+
+ bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
+ result = (type_out) bf_value;
+
+ where type_out is a non-bitfield type, that is to say, it's precision
matches
+ 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
+
+ Input:
+
+ * STMT_VINFO: The stmt from which the pattern search begins.
+ here it starts with:
+ result = (type_out) bf_value;
+
+ Output:
+
+ * TYPE_OUT: The vector type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. If the precision of type_out is bigger
+ than the precision type of _1 we perform the widening before the shifting,
+ since the new precision will be large enough to shift the value and moving
+ widening operations up the statement chain enables the generation of
+ widening loads. If we are widening and the operation after the pattern is
+ an addition then we mask first and shift later, to enable the generation of
+ shifting adds. In the case of narrowing we will always mask first, shift
+ last and then perform a narrowing operation. This will enable the
+ generation of narrowing shifts.
+
+ Widening with mask first, shift later:
+ container = (type_out) container;
+ masked = container & (((1 << bitsize) - 1) << bitpos);
+ result = patt2 >> masked;
+
+ Widening with shift first, mask last:
+ container = (type_out) container;
+ shifted = container >> bitpos;
+ result = shifted & ((1 << bitsize) - 1);
+
+ Narrowing:
+ masked = container & (((1 << bitsize) - 1) << bitpos);
+ result = masked >> bitpos;
+ result = (type_out) result;
+
+ The shifting is always optional depending on whether bitpos != 0.
+
+*/
+
+static gimple *
+vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
+ tree *type_out)
+{
+ gassign *first_stmt = dyn_cast <gassign *> (stmt_info->stmt);
+
+ if (!first_stmt)
+ return NULL;
+
+ gassign *bf_stmt;
+ if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (first_stmt))
+ && TREE_CODE (gimple_assign_rhs1 (first_stmt)) == SSA_NAME)
+ {
+ gimple *second_stmt
+ = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (first_stmt));
+ bf_stmt = dyn_cast <gassign *> (second_stmt);
+ if (!bf_stmt
+ || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF)
+ return NULL;
+ }
+ else
+ return NULL;
+
+ tree bf_ref = gimple_assign_rhs1 (bf_stmt);
+ tree container = TREE_OPERAND (bf_ref, 0);
+
+ if (!bit_field_offset (bf_ref).is_constant ()
+ || !bit_field_size (bf_ref).is_constant ()
+ || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
+ return NULL;
+
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref)))
+ return NULL;
+
+ gimple *use_stmt, *pattern_stmt;
+ use_operand_p use_p;
+ tree ret = gimple_assign_lhs (first_stmt);
+ tree ret_type = TREE_TYPE (ret);
+ bool shift_first = true;
+ tree vectype;
+
+ /* If the first operand of the BIT_FIELD_REF is not an INTEGER type, convert
+ it to one of the same width so we can perform the necessary masking and
+ shifting. */
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (container)))
+ {
+ unsigned HOST_WIDE_INT container_size =
+ tree_to_uhwi (TYPE_SIZE (TREE_TYPE (container)));
+ tree int_type = build_nonstandard_integer_type (container_size, true);
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (int_type),
+ VIEW_CONVERT_EXPR, container);
+ vectype = get_vectype_for_scalar_type (vinfo, int_type);
+ container = gimple_assign_lhs (pattern_stmt);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
+ }
+ else
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (container));
+
+ /* We move the conversion earlier if the loaded type is smaller than the
+ return type to enable the use of widening loads. */
+ if (TYPE_PRECISION (TREE_TYPE (container)) < TYPE_PRECISION (ret_type)
+ && !useless_type_conversion_p (TREE_TYPE (container), ret_type))
+ {
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
+ NOP_EXPR, container);
+ container = gimple_get_lhs (pattern_stmt);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
+ }
+ else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
+ /* If we are doing the conversion last then also delay the shift as we may
+ be able to combine the shift and conversion in certain cases. */
+ shift_first = false;
+
+ tree container_type = TREE_TYPE (container);
+
+ /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
+ PLUS_EXPR then do the shift last as some targets can combine the shift and
+ add into a single instruction. */
+ if (single_imm_use (gimple_assign_lhs (first_stmt), &use_p, &use_stmt))
+ {
+ if (gimple_code (use_stmt) == GIMPLE_ASSIGN
+ && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR)
+ shift_first = false;
+ }
+
+ unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant ();
+ unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant ();
+ unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
+ if (BYTES_BIG_ENDIAN)
+ shift_n = prec - shift_n - mask_width;
+
+ /* If we don't have to shift we only generate the mask, so just fix the
+ code-path to shift_first. */
+ if (shift_n == 0)
+ shift_first = true;
+
+ tree result;
+ if (shift_first)
+ {
+ tree shifted = container;
+ if (shift_n)
+ {
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
+ RSHIFT_EXPR, container,
+ build_int_cst (sizetype, shift_n));
+ shifted = gimple_assign_lhs (pattern_stmt);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
+ }
+
+ tree mask = wide_int_to_tree (container_type,
+ wi::mask (mask_width, false, prec));
+
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
+ BIT_AND_EXPR, shifted, mask);
+ result = gimple_assign_lhs (pattern_stmt);
+ }
+ else
+ {
+ tree mask = wide_int_to_tree (container_type,
+ wi::shifted_mask (shift_n, mask_width,
+ false, prec));
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
+ BIT_AND_EXPR, container, mask);
+ tree masked = gimple_assign_lhs (pattern_stmt);
+
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
+ RSHIFT_EXPR, masked,
+ build_int_cst (sizetype, shift_n));
+ result = gimple_assign_lhs (pattern_stmt);
+ }
+
+ if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
+ {
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype);
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (ret_type),
+ NOP_EXPR, result);
+ }
+
+ *type_out = STMT_VINFO_VECTYPE (stmt_info);
+ vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt);
+
+ return pattern_stmt;
+}
+
+/* Function vect_recog_bit_insert_pattern
+
+ Try to find the following pattern:
+
+ written = BIT_INSERT_EXPR (container, value, bitpos);
+
+ Input:
+
+ * STMT_VINFO: The stmt we want to replace.
+
+ Output:
+
+ * TYPE_OUT: The vector type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. In this case it will be:
+ value = (container_type) value; // Make sure
+ shifted = value << bitpos; // Shift value into place
+ masked = shifted & (mask << bitpos); // Mask off the
non-relevant bits in
+ // the 'to-write value'.
+ cleared = container & ~(mask << bitpos); // Clearing the bits we want to
+ // write to from the value we want
+ // to write to.
+ written = cleared | masked; // Write bits.
+
+
+ where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number
of
+ bits corresponding to the real size of the bitfield value we are writing to.
+ The shifting is always optional depending on whether bitpos != 0.
+
+*/
+
+static gimple *
+vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
+ tree *type_out)
+{
+ gassign *bf_stmt = dyn_cast <gassign *> (stmt_info->stmt);
+ if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR)
+ return NULL;
+
+ tree container = gimple_assign_rhs1 (bf_stmt);
+ tree value = gimple_assign_rhs2 (bf_stmt);
+ tree shift = gimple_assign_rhs3 (bf_stmt);
+
+ tree bf_type = TREE_TYPE (value);
+ tree container_type = TREE_TYPE (container);
+
+ if (!INTEGRAL_TYPE_P (container_type)
+ || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
+ return NULL;
+
+ gimple *pattern_stmt;
+
+ vect_unpromoted_value unprom;
+ unprom.set_op (value, vect_internal_def);
+ value = vect_convert_input (vinfo, stmt_info, container_type, &unprom,
+ get_vectype_for_scalar_type (vinfo,
+ container_type));
+
+ unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
+ unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
+ unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
+ if (BYTES_BIG_ENDIAN)
+ {
+ shift_n = prec - shift_n - mask_width;
+ shift = build_int_cst (TREE_TYPE (shift), shift_n);
+ }
+
+ if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
+ {
+ pattern_stmt =
+ gimple_build_assign (vect_recog_temp_ssa_var (container_type),
+ NOP_EXPR, value);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
+ value = gimple_get_lhs (pattern_stmt);
+ }
+
+ /* Shift VALUE into place. */
+ tree shifted = value;
+ if (shift_n)
+ {
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
+ LSHIFT_EXPR, value, shift);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
+ shifted = gimple_get_lhs (pattern_stmt);
+ }
+
+ tree mask_t
+ = wide_int_to_tree (container_type,
+ wi::shifted_mask (shift_n, mask_width, false, prec));
+
+ /* Clear bits we don't want to write back from SHIFTED. */
+ gimple_seq stmts = NULL;
+ tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted,
+ mask_t);
+ if (!gimple_seq_empty_p (stmts))
+ {
+ pattern_stmt = gimple_seq_first_stmt (stmts);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
+ }
+
+ /* Mask off the bits in the container that we are to write to. */
+ mask_t = wide_int_to_tree (container_type,
+ wi::shifted_mask (shift_n, mask_width, true,
prec));
+ tree cleared = vect_recog_temp_ssa_var (container_type);
+ pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container,
mask_t);
+ append_pattern_def_seq (vinfo, stmt_info, pattern_stmt);
+
+ /* Write MASKED into CLEARED. */
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (container_type),
+ BIT_IOR_EXPR, cleared, masked);
+
+ *type_out = STMT_VINFO_VECTYPE (stmt_info);
+ vect_pattern_detected ("bit_insert pattern", stmt_info->stmt);
+
+ return pattern_stmt;
+}
+
+
/* Recognize cases in which an operation is performed in one type WTYPE
but could be done more efficiently in a narrower type NTYPE. For example,
if we have:
@@ -5623,6 +5948,8 @@ struct vect_recog_func
taken which means usually the more complex one needs to preceed the
less comples onex (widen_sum only after dot_prod or sad for example). */
static vect_recog_func vect_vect_recog_func_ptrs[] = {
+ { vect_recog_bitfield_ref_pattern, "bitfield_ref" },
+ { vect_recog_bit_insert_pattern, "bit_insert" },
{ vect_recog_over_widening_pattern, "over_widening" },
/* Must come after over_widening, which narrows the shift as much as
possible beforehand. */