Hi Richard, Thanks for the review.
On Tue, 28 May 2019 at 20:44, Richard Sandiford <richard.sandif...@arm.com> wrote: > > Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org> writes: > > [...] > > diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c > > index b3fae5b..c15b8a2 100644 > > --- a/gcc/tree-vect-loop-manip.c > > +++ b/gcc/tree-vect-loop-manip.c > > @@ -415,10 +415,16 @@ vect_set_loop_masks_directly (struct loop *loop, > > loop_vec_info loop_vinfo, > > bool might_wrap_p) > > { > > tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); > > + tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo); > > tree mask_type = rgm->mask_type; > > unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter; > > poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type); > > + bool convert = false; > > > > + /* If the compare_type is not iv_type, we will create an IV with > > + iv_type with truncated use (i.e. converted to the correct type). */ > > + if (compare_type != iv_type) > > + convert = true; > > /* Calculate the maximum number of scalar values that the rgroup > > handles in total, the number that it handles for each iteration > > of the vector loop, and the number that it should skip during the > > @@ -444,12 +450,43 @@ vect_set_loop_masks_directly (struct loop *loop, > > loop_vec_info loop_vinfo, > > processed. */ > > tree index_before_incr, index_after_incr; > > gimple_stmt_iterator incr_gsi; > > + gimple_stmt_iterator incr_gsi2; > > bool insert_after; > > - tree zero_index = build_int_cst (compare_type, 0); > > + tree zero_index; > > standard_iv_increment_position (loop, &incr_gsi, &insert_after); > > - create_iv (zero_index, nscalars_step, NULL_TREE, loop, &incr_gsi, > > - insert_after, &index_before_incr, &index_after_incr); > > > > + if (convert) > > + { > > + /* If we are creating IV of iv_type and then converting. */ > > + zero_index = build_int_cst (iv_type, 0); > > + tree step = build_int_cst (iv_type, > > + LOOP_VINFO_VECT_FACTOR (loop_vinfo)); > > + /* Creating IV of iv_type. */ > > + create_iv (zero_index, step, NULL_TREE, loop, &incr_gsi, > > + insert_after, &index_before_incr, &index_after_incr); > > + /* Create truncated index_before and after increament. */ > > + tree index_before_incr_trunc = make_ssa_name (compare_type); > > + tree index_after_incr_trunc = make_ssa_name (compare_type); > > + gimple *incr_before_stmt = gimple_build_assign > > (index_before_incr_trunc, > > + NOP_EXPR, > > + index_before_incr); > > + gimple *incr_after_stmt = gimple_build_assign > > (index_after_incr_trunc, > > + NOP_EXPR, > > + index_after_incr); > > + incr_gsi2 = incr_gsi; > > + gsi_insert_before (&incr_gsi2, incr_before_stmt, GSI_NEW_STMT); > > + gsi_insert_after (&incr_gsi, incr_after_stmt, GSI_NEW_STMT); > > + index_before_incr = index_before_incr_trunc; > > + index_after_incr = index_after_incr_trunc; > > + zero_index = build_int_cst (compare_type, 0); > > + } > > + else > > + { > > + /* If the IV is of compare_type, no convertion needed. */ > > + zero_index = build_int_cst (compare_type, 0); > > + create_iv (zero_index, nscalars_step, NULL_TREE, loop, &incr_gsi, > > + insert_after, &index_before_incr, &index_after_incr); > > + } > > tree test_index, test_limit, first_limit; > > gimple_stmt_iterator *test_gsi; > > if (might_wrap_p) > > Now that we have an explicit iv_type, there shouldn't be any need to > treat this as two special cases. I think we should just convert the > IV to the comparison type before passing it to the WHILE. Changed it. > > > @@ -617,6 +654,41 @@ vect_set_loop_masks_directly (struct loop *loop, > > loop_vec_info loop_vinfo, > > return next_mask; > > } > > > > +/* Return the iv_limit for fully masked loop LOOP with LOOP_VINFO. > > + If it is not possible to calcilate iv_limit, return -1. */ > > Maybe: > > /* Decide whether it is possible to use a zero-based induction variable > when vectorizing LOOP_VINFO with a fully-masked loop. If it is, > return the value that the induction variable must be able to hold > in order to ensure that the loop ends with an all-false mask. > Return -1 otherwise. */ > > I think the function should go on in tree-vect-loop.c instead. OK. > > > +widest_int > > +vect_get_loop_iv_limit (struct loop *loop, loop_vec_info loop_vinfo) > > Maybe: vect_iv_limit_for_full_masking > > Probably worth dropping the "loop" parameter and getting it from > LOOP_VINFO. OK. > > > + > > + /* Convert skip_niters to the right type. */ > > Comment no longer applies. > > > + tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); > > + unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo); > > + > > + /* Now calculate the value that the induction variable must be able > > + to hit in order to ensure that we end the loop with an all-false mask. > > + This involves adding the maximum number of inactive trailing scalar > > + iterations. */ > > + widest_int iv_limit = -1; > > + bool known_max_iters = max_loop_iterations (loop, &iv_limit); > > + if (known_max_iters) > > No need for this temporary variable. > > > + { > > + if (niters_skip) > > + { > > + /* Add the maximum number of skipped iterations to the > > + maximum iteration count. */ > > + if (TREE_CODE (niters_skip) == INTEGER_CST) > > + iv_limit += wi::to_widest (niters_skip); > > + else > > + iv_limit += max_vf - 1; > > + } > > Note that MASK_SKIP_NITERS isn't set at the point you call it > for vect_set_loop_condition_masked. I think we should have: > > else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) > /* Make a conservatively-correct assumption. */ > iv_limit += max_vf - 1; OK. > > > + /* IV_LIMIT is the maximum number of latch iterations, which is also > > + the maximum in-range IV value. Round this value down to the previous > > + vector alignment boundary and then add an extra full iteration. */ > > + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); > > + iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf; > > + } > > + return iv_limit; > > +} > > + > > /* Make LOOP iterate NITERS times using masking and WHILE_ULT calls. > > LOOP_VINFO describes the vectorization of LOOP. NITERS is the > > number of iterations of the original scalar loop that should be > > [...] > > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > > index e1229a5..431025b 100644 > > --- a/gcc/tree-vect-loop.c > > +++ b/gcc/tree-vect-loop.c > > @@ -1056,6 +1056,16 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) > > /* Find a scalar mode for which WHILE_ULT is supported. */ > > opt_scalar_int_mode cmp_mode_iter; > > tree cmp_type = NULL_TREE; > > + tree iv_type = NULL_TREE; > > + widest_int iv_limit = vect_get_loop_iv_limit (loop, loop_vinfo); > > + widest_int iv_precision = -1; > > iv_precision should be unsigned int. Setting it to UINT_MAX would > simplify the later code. OK. > > > + > > + if (iv_limit != -1) > > + iv_precision > > + = wi::min_precision (iv_limit > > + * vect_get_max_nscalars_per_iter (loop_vinfo), > > + UNSIGNED); > > + > > Would be good to avoid the duplicated call to > vect_get_max_nscalars_per_iter (also called for min_ni_width). OK. > > > FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT) > > { > > unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ()); > > @@ -1066,13 +1076,25 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) > > if (this_type > > && can_produce_all_loop_masks_p (loop_vinfo, this_type)) > > { > > - /* Although we could stop as soon as we find a valid mode, > > - it's often better to continue until we hit Pmode, since the > > + /* See whether zero-based IV would ever generate all-false masks > > + before wrapping around. */ > > + bool might_wrap_p = (iv_limit == -1 || (iv_precision > > > cmp_bits)); > > With the above change, the iv_limit check would no longer be needed. > > > + /* Stop as soon as we find a valid mode. If we decided to use > > + cmp_type which is less than Pmode precision, it is often > > better > > + to use iv_type corresponding to Pmode, since the > > operands to the WHILE are more likely to be reusable in > > - address calculations. */ > > + address calculations in this case. */ > > cmp_type = this_type; > > + iv_type = this_type; > > if (cmp_bits >= GET_MODE_BITSIZE (Pmode)) > > break; > > + if (!might_wrap_p) > > + { > > + iv_type > > + = build_nonstandard_integer_type (GET_MODE_BITSIZE > > (Pmode), > > + true); > > + break; > > + } > > I think the loop should break in the same place as before, with the > iv_type being what used to be the cmp_type. The new behaviour is that > (for the new meaning of cmp_type) we keep the current cmp_type if its > precision is already >= iv_precision. OK. Attached patch fixes the issues raised. Does this look better? Thanks, Kugan > > Thanks, > Richard
From 4c6d5c2aaa1c7fef8773aabf3c6f5edb37c58b68 Mon Sep 17 00:00:00 2001 From: Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> Date: Tue, 28 May 2019 11:57:54 +1000 Subject: [PATCH] PR88838 V4 Change-Id: Ica9561d88379f472e4ec4b96aab5c7e1752f2fcc --- gcc/testsuite/gcc.target/aarch64/pr88838.c | 11 ++++ gcc/testsuite/gcc.target/aarch64/sve/while_1.c | 16 +++--- gcc/tree-vect-loop-manip.c | 52 +++++++------------ gcc/tree-vect-loop.c | 71 ++++++++++++++++++++++++-- gcc/tree-vectorizer.h | 6 +++ 5 files changed, 110 insertions(+), 46 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr88838.c diff --git a/gcc/testsuite/gcc.target/aarch64/pr88838.c b/gcc/testsuite/gcc.target/aarch64/pr88838.c new file mode 100644 index 0000000..d7db847 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr88838.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */ + +void +f (int *restrict x, int *restrict y, int *restrict z, int n) +{ + for (int i = 0; i < n; i += 1) + x[i] = y[i] + z[i]; +} + +/* { dg-final { scan-assembler-not "sxtw" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_1.c b/gcc/testsuite/gcc.target/aarch64/sve/while_1.c index a93a04b..05a4860 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/while_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_1.c @@ -26,14 +26,14 @@ TEST_ALL (ADD_LOOP) /* { dg-final { scan-assembler-not {\tuqdec} } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, x[0-9]+,} 2 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, xzr,} 2 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, x[0-9]+,} 2 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, xzr,} 3 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, x[0-9]+,} 3 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, xzr,} 3 } } */ -/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, x[0-9]+,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, wzr,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, w[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, wzr,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, w[0-9]+,} 2 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, wzr,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, w[0-9]+,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, wzr,} 3 } } */ +/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, w[0-9]+,} 3 } } */ /* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z, \[x0, x[0-9]+\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x0, x[0-9]+\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x0, x[0-9]+, lsl 1\]\n} 2 } } */ diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index b3fae5b..fa8e7f2 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -415,6 +415,7 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo, bool might_wrap_p) { tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); + tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo); tree mask_type = rgm->mask_type; unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter; poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type); @@ -445,11 +446,16 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo, tree index_before_incr, index_after_incr; gimple_stmt_iterator incr_gsi; bool insert_after; - tree zero_index = build_int_cst (compare_type, 0); standard_iv_increment_position (loop, &incr_gsi, &insert_after); - create_iv (zero_index, nscalars_step, NULL_TREE, loop, &incr_gsi, + + tree zero_index = build_int_cst (iv_type, 0); + tree step = build_int_cst (iv_type, + LOOP_VINFO_VECT_FACTOR (loop_vinfo)); + /* Creating IV of iv_type. */ + create_iv (zero_index, step, NULL_TREE, loop, &incr_gsi, insert_after, &index_before_incr, &index_after_incr); + zero_index = build_int_cst (compare_type, 0); tree test_index, test_limit, first_limit; gimple_stmt_iterator *test_gsi; if (might_wrap_p) @@ -609,8 +615,14 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo, /* Get the mask value for the next iteration of the loop. */ next_mask = make_temp_ssa_name (mask_type, NULL, "next_mask"); - gcall *call = vect_gen_while (next_mask, test_index, this_test_limit); - gsi_insert_before (test_gsi, call, GSI_SAME_STMT); + tree test_index_cmp_type = make_ssa_name (compare_type); + gimple *conv_stmt = gimple_build_assign (test_index_cmp_type, + NOP_EXPR, + test_index); + gsi_insert_before (test_gsi, conv_stmt, GSI_NEW_STMT); + gcall *call = vect_gen_while (next_mask, test_index_cmp_type, + this_test_limit); + gsi_insert_after (test_gsi, call, GSI_SAME_STMT); vect_set_loop_mask (loop, mask, init_mask, next_mask); } @@ -637,12 +649,12 @@ vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo, tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); unsigned int compare_precision = TYPE_PRECISION (compare_type); - unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo); tree orig_niters = niters; /* Type of the initial value of NITERS. */ tree ni_actual_type = TREE_TYPE (niters); unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type); + tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); /* Convert NITERS to the same size as the compare. */ if (compare_precision > ni_actual_precision @@ -661,33 +673,7 @@ vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo, else niters = gimple_convert (&preheader_seq, compare_type, niters); - /* Convert skip_niters to the right type. */ - tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); - - /* Now calculate the value that the induction variable must be able - to hit in order to ensure that we end the loop with an all-false mask. - This involves adding the maximum number of inactive trailing scalar - iterations. */ - widest_int iv_limit; - bool known_max_iters = max_loop_iterations (loop, &iv_limit); - if (known_max_iters) - { - if (niters_skip) - { - /* Add the maximum number of skipped iterations to the - maximum iteration count. */ - if (TREE_CODE (niters_skip) == INTEGER_CST) - iv_limit += wi::to_widest (niters_skip); - else - iv_limit += max_vf - 1; - } - /* IV_LIMIT is the maximum number of latch iterations, which is also - the maximum in-range IV value. Round this value down to the previous - vector alignment boundary and then add an extra full iteration. */ - poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf; - } - + widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo); /* Get the vectorization factor in tree form. */ tree vf = build_int_cst (compare_type, LOOP_VINFO_VECT_FACTOR (loop_vinfo)); @@ -717,7 +703,7 @@ vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo, /* See whether zero-based IV would ever generate all-false masks before wrapping around. */ bool might_wrap_p - = (!known_max_iters + = (iv_limit == UINT_MAX || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter, UNSIGNED) > compare_precision)); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 4942c69..1240037 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1029,7 +1029,10 @@ static bool vect_verify_full_masking (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)); unsigned int min_ni_width; + unsigned int max_nscalars_per_iter + = vect_get_max_nscalars_per_iter (loop_vinfo); /* Use a normal loop if there are no statements that need masking. This only happens in rare degenerate cases: it means that the loop @@ -1048,7 +1051,7 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) max_ni = wi::smin (max_ni, max_back_edges + 1); /* Account for rgroup masks, in which each bit is replicated N times. */ - max_ni *= vect_get_max_nscalars_per_iter (loop_vinfo); + max_ni *= max_nscalars_per_iter; /* Work out how many bits we need to represent the limit. */ min_ni_width = wi::min_precision (max_ni, UNSIGNED); @@ -1056,6 +1059,14 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) /* Find a scalar mode for which WHILE_ULT is supported. */ opt_scalar_int_mode cmp_mode_iter; tree cmp_type = NULL_TREE; + tree iv_type = NULL_TREE; + widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo); + widest_int iv_precision = UINT_MAX; + + if (iv_limit != UINT_MAX) + iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter, + UNSIGNED); + FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT) { unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ()); @@ -1066,11 +1077,18 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) if (this_type && can_produce_all_loop_masks_p (loop_vinfo, this_type)) { - /* Although we could stop as soon as we find a valid mode, - it's often better to continue until we hit Pmode, since the + /* See whether zero-based IV would ever generate all-false masks + before wrapping around. */ + bool might_wrap_p = (iv_precision > cmp_bits); + /* Stop as soon as we find a valid mode. If we decided to use + cmp_type which is less than Pmode precision, it is often better + to use iv_type corresponding to Pmode, since the operands to the WHILE are more likely to be reusable in - address calculations. */ - cmp_type = this_type; + address calculations in this case. */ + iv_type = this_type; + if (might_wrap_p + || (cmp_bits <= TYPE_PRECISION (niters_type))) + cmp_type = this_type; if (cmp_bits >= GET_MODE_BITSIZE (Pmode)) break; } @@ -1081,6 +1099,7 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) return false; LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type; + LOOP_VINFO_MASK_IV_TYPE (loop_vinfo) = iv_type; return true; } @@ -9014,3 +9033,45 @@ optimize_mask_stores (struct loop *loop) add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION); } } + +/* Decide whether it is possible to use a zero-based induction variable + when vectorizing LOOP_VINFO with a fully-masked loop. If it is, + return the value that the induction variable must be able to hold + in order to ensure that the loop ends with an all-false mask. + Return -1 otherwise. */ +widest_int +vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo) +{ + tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo); + + /* Now calculate the value that the induction variable must be able + to hit in order to ensure that we end the loop with an all-false mask. + This involves adding the maximum number of inactive trailing scalar + iterations. */ + widest_int iv_limit = -1; + if (max_loop_iterations (loop, &iv_limit)) + { + if (niters_skip) + { + /* Add the maximum number of skipped iterations to the + maximum iteration count. */ + if (TREE_CODE (niters_skip) == INTEGER_CST) + iv_limit += wi::to_widest (niters_skip); + else + iv_limit += max_vf - 1; + } + else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) + /* Make a conservatively-correct assumption. */ + iv_limit += max_vf - 1; + + /* IV_LIMIT is the maximum number of latch iterations, which is also + the maximum in-range IV value. Round this value down to the previous + vector alignment boundary and then add an extra full iteration. */ + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf; + } + return iv_limit; +} + diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 4db30cc..eb0f21f 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -435,6 +435,10 @@ typedef struct _loop_vec_info : public vec_info { is false and vectorized loop otherwise. */ tree simd_if_cond; + /* Type of the IV to use in the WHILE_ULT call for fully-masked + loops. */ + tree iv_type; + /* Unknown DRs according to which loop was peeled. */ struct dr_vec_info *unaligned_dr; @@ -570,6 +574,7 @@ typedef struct _loop_vec_info : public vec_info { #define LOOP_VINFO_MASKS(L) (L)->masks #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters #define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type +#define LOOP_VINFO_MASK_IV_TYPE(L) (L)->iv_type #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs @@ -1582,6 +1587,7 @@ extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *, /* FORNOW: Used in tree-parloops.c. */ extern stmt_vec_info vect_force_simple_reduction (loop_vec_info, stmt_vec_info, bool *, bool); +extern widest_int vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo); /* Used in gimple-loop-interchange.c. */ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, enum tree_code); -- 2.7.4