New in V2:
----------
- The approach of the patch is largely the same as in v1, with the
key functional distinction that `alignment_support_scheme' had been
re-categorized as `dr_unaligned_unsupported'. This is indicative of
memory accesses which are known to be unaligned at compile time, for
an architecture where vectorized unaligned accesses are unsupported.
For accesses whose alignment may not be known but which are required
at runtime, the correct category is `dr_aligned'.
- Finally, a simplification is made. Before, we had the check:
if ((vf.is_constant () && pow2p_hwi (new_alignment.to_constant ()))
|| (!vf.is_constant () && pow2p_hwi (align_factor_c)))
Given that we have `new_alignment = vf * align_factor_c', I do not
believe anything can be derived from `new_alignment' that is now known
already in `align_factor_c '. That is, a power of 2 has only one
prime factor: 2. If `align_factor_c' is not itself a power of 2,
the multiplication by `vf' cannot correct that, such that the check
for `new_alignment' seems redundant. The predicate can thus
simplified to `if (pow2p_hwi (align_factor_c))'. The seems sound to
me, should my logic be correct.
-------------------
For the vectorization of non-contiguous memory accesses such as the
vectorization of loads from a particular struct member, specifically
when vectorizing with unknown bounds (thus using a pointer and not an
array) it is observed that inadequate alignment checking allows for
the crossing of a page boundary within a single vectorized loop
iteration. This leads to potential segmentation faults in the
resulting binaries.
For example, for the given datatype:
typedef struct {
uint64_t a;
uint64_t b;
uint32_t flag;
uint32_t pad;
} Data;
and a loop such as:
int
foo (Data *ptr) {
if (ptr == NULL)
return -1;
int cnt;
for (cnt = 0; cnt < MAX; cnt++) {
if (ptr->flag == 0)
break;
ptr++;
}
return cnt;
}
the vectorizer yields the following cfg on armhf:
<bb 1>:
_41 = ptr_4(D) + 16;
<bb 2>:
_44 = MEM[(unsigned int *)ivtmp_42];
ivtmp_45 = ivtmp_42 + 24;
_46 = MEM[(unsigned int *)ivtmp_45];
ivtmp_47 = ivtmp_45 + 24;
_48 = MEM[(unsigned int *)ivtmp_47];
ivtmp_49 = ivtmp_47 + 24;
_50 = MEM[(unsigned int *)ivtmp_49];
vect_cst__51 = {_44, _46, _48, _50};
mask_patt_6.17_52 = vect_cst__51 == { 0, 0, 0, 0 };
if (mask_patt_6.17_52 != { 0, 0, 0, 0 })
goto <bb 4>;
else
ivtmp_43 = ivtmp_42 + 96;
goto <bb 2>;
<bb4>
...
without any proper address alignment checks on the starting address
or on whether alignment is preserved across iterations. We therefore
fix the handling of such cases.
In `vect_compute_data_ref_alignment' we ensure that the vector
alignment requirements are sufficiently large to encompass the entire
address range covered by the group of element-wise loads carried out
in the single vectorized iteration, rounded up to the nearest power of
2. Finally, in `get_load_store_type', we ensure correct
categorization of the alignment support scheme for VMAT_ELEMENTWISE
access types, depending on whether or not they happen within the
context of a scalarized vector access in a loop with unknown bounds
where speculative reads are required. Where this is found to be the
case, we modify `alignment_support_scheme' from
`dr_unaligned_supported' to `dr_aligned', ensuring
proper alignment is required.
gcc/ChangeLog:
PR tree-optimization/124037
* tree-vect-stmts.cc (get_load_store_type): Fix
alignment_support_scheme categorization of VMAT_ELEMENTWISE
loads.
* tree-vect-data-refs.cc (vect_compute_data_ref_alignment):
Round alignments to next power of 2.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/vect-pr124037.c: New.
g++/testsuite/ChangeLog:
* g++.dg/vect/vect-pr124037.c: New.
---
gcc/testsuite/g++.dg/vect/vect-pr124037.cc | 38 ++++++++++++++
gcc/testsuite/gcc.dg/vect/vect-pr124037.c | 58 ++++++++++++++++++++++
gcc/tree-vect-data-refs.cc | 19 ++++---
gcc/tree-vect-stmts.cc | 14 +++++-
4 files changed, 117 insertions(+), 12 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/vect/vect-pr124037.cc
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-pr124037.c
diff --git a/gcc/testsuite/g++.dg/vect/vect-pr124037.cc
b/gcc/testsuite/g++.dg/vect/vect-pr124037.cc
new file mode 100644
index 00000000000..6e46238c5de
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/vect-pr124037.cc
@@ -0,0 +1,38 @@
+/* PR tree-optimization/124037 */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-additional-options "-std=c++11" } */
+
+struct Token
+{
+ unsigned t, t1, t2;
+ void *a, *b;
+ unsigned short Kind;
+ unsigned short flags;
+ constexpr Token() : Token(0){}
+ constexpr Token(int a): t(0), t1(0), t2 (0), a(nullptr),b (nullptr),
+ Kind(a), flags(0) {}
+ bool isNot(int K) const { return Kind != K; }
+};
+
+[[gnu::noipa]]
+unsigned getArgLength(const Token *ArgPtr) {
+ unsigned NumArgTokens = 0;
+ for (; ArgPtr->isNot(0); ++ArgPtr)
+ ++NumArgTokens;
+ return NumArgTokens;
+}
+
+Token tokens[] = {{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+ {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+ {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+ {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+ {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+ {1},{1},{1},{1},{1},{1},{}};
+
+int main()
+{
+ __builtin_printf("%d\n", getArgLength(tokens));
+}
+/* { dg-final { scan-tree-dump "alignment increased due to early break" "vect"
} } */
+/* { dg-final { scan-tree-dump "step doesn't divide the vector alignment"
"vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr124037.c
b/gcc/testsuite/gcc.dg/vect/vect-pr124037.c
new file mode 100644
index 00000000000..c6326bda5bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr124037.c
@@ -0,0 +1,58 @@
+/* PR tree-optimization/124037 */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#define MAX 65536
+
+typedef struct {
+ uint64_t a;
+ uint64_t b;
+ uint32_t flag;
+ uint32_t pad;
+} Data;
+
+int __attribute__ ((noinline))
+foo (Data *ptr) {
+ if (ptr == NULL)
+ return -1;
+
+ int cnt;
+ for (cnt = 0; cnt < MAX; cnt++) {
+ if (ptr->flag == 0)
+ break;
+ ptr++;
+ }
+ return cnt;
+}
+
+int main() {
+ long pgsz = sysconf (_SC_PAGESIZE);
+ if (pgsz == -1) {
+ fprintf (stderr, "sysconf failed\n");
+ return 0;
+ }
+
+ /* Allocate 2 consecutive pages. */
+ void *mem = mmap (NULL, pgsz * 2, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mem == MAP_FAILED) {
+ fprintf (stderr, "mmap failed\n");
+ return 0;
+ }
+
+ memset (mem, 1, pgsz);
+ uint8_t *ptr = (uint8_t*) mem + pgsz;
+ memset (ptr - 16, 0, 16);
+
+ mprotect (ptr, pgsz, PROT_NONE);
+ Data *data = (Data *) (ptr - 80);
+ __builtin_printf("%d\n", foo(data));
+}
+
+/* { dg-final { scan-tree-dump "alignment increased due to early break" "vect"
} } */
+/* { dg-final { scan-tree-dump "step doesn't divide the vector alignment"
"vect" } } */
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index e1facc7e957..c6f77bdc4b1 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -1457,21 +1457,20 @@ vect_compute_data_ref_alignment (vec_info *vinfo,
dr_vec_info *dr_info,
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
align_factor_c *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
+ if (!pow2p_hwi (align_factor_c))
+ align_factor_c = HOST_WIDE_INT_1U << ceil_log2 (align_factor_c);
+