On Fri, Dec 1, 2017 at 12:31 PM, Richard Biener <rguent...@suse.de> wrote: > > This is the access stride computation change. Apart from the > stride extraction I adjusted the cost model to handle non-constant > strides by checking if either is a multiple of the other and > simply fail interchanging if it's the wrong way around for one > ref or if the simple method using multiple_of_p fails to determine > either case. > > This still handles the bwaves case. > > I think we want additional testcases with variable strides for each > case we add - I believe this is the most conservative way to treat > variable strides. > > It may be inconsistent with the constant stride handling where you > allow for many OK DRs to outweight a few not OK DRs, but as it > worked for bwaves it must be good enough ;) > > Tested on x86_64-unknown-linux-gnu (just the interchange testcases). > > Currently running a bootstrap with -O3 -g -floop-interchange. > > Ok for the branch? Ok. This actually is closer the motivation: simple/conservative cost model that only transforms code when it's known to be good. I will check the impact on the number of interchange in spec.
Thanks, bin > > Richard. > > 2017-12-01 Richard Biener <rguent...@suse.de> > > * gimple-loop-interchange.cc (estimate_val_by_simplify_replace): > Remove. > (compute_access_stride): Rewrite using instantiate_scev, > remove constant substitution. > (should_interchange_loops): Adjust for non-constant strides. > > Index: gcc/gimple-loop-interchange.cc > =================================================================== > --- gcc/gimple-loop-interchange.cc (revision 255303) > +++ gcc/gimple-loop-interchange.cc (working copy) > @@ -1325,42 +1325,6 @@ tree_loop_interchange::move_code_to_inne > } > } > > -/* Estimate and return the value of EXPR by replacing variables in EXPR > - with CST_TREE and simplifying. */ > - > -static tree > -estimate_val_by_simplify_replace (tree expr, tree cst_tree) > -{ > - unsigned i, n; > - tree ret = NULL_TREE, e, se; > - > - if (!expr) > - return NULL_TREE; > - > - /* Do not bother to replace constants. */ > - if (CONSTANT_CLASS_P (expr)) > - return expr; > - > - if (!EXPR_P (expr)) > - return cst_tree; > - > - n = TREE_OPERAND_LENGTH (expr); > - for (i = 0; i < n; i++) > - { > - e = TREE_OPERAND (expr, i); > - se = estimate_val_by_simplify_replace (e, cst_tree); > - if (e == se) > - continue; > - > - if (!ret) > - ret = copy_node (expr); > - > - TREE_OPERAND (ret, i) = se; > - } > - > - return (ret ? fold (ret) : expr); > -} > - > /* Given data reference DR in LOOP_NEST, the function computes DR's access > stride at each level of loop from innermost LOOP to outer. On success, > it saves access stride at each level loop in a vector which is pointed > @@ -1388,44 +1352,31 @@ compute_access_stride (struct loop *loop > > tree ref = DR_REF (dr); > tree scev_base = build_fold_addr_expr (ref); > - tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref)); > - tree niters = build_int_cst (sizetype, AVG_LOOP_NITER); > - access_size = fold_build2 (MULT_EXPR, sizetype, niters, access_size); > - > - do { > - tree scev_fn = analyze_scalar_evolution (loop, scev_base); > - if (chrec_contains_undetermined (scev_fn) > - || chrec_contains_symbols_defined_in_loop (scev_fn, loop->num)) > - break; > - > - if (TREE_CODE (scev_fn) != POLYNOMIAL_CHREC) > - { > - scev_base = scev_fn; > - strides->safe_push (build_int_cst (sizetype, 0)); > - continue; > - } > - > - scev_base = CHREC_LEFT (scev_fn); > - if (tree_contains_chrecs (scev_base, NULL)) > - break; > - > - tree scev_step = fold_convert (sizetype, CHREC_RIGHT (scev_fn)); > - > - enum ev_direction scev_dir = scev_direction (scev_fn); > - /* Estimate if step isn't constant. */ > - if (scev_dir == EV_DIR_UNKNOWN) > - { > - scev_step = estimate_val_by_simplify_replace (scev_step, niters); > - if (TREE_CODE (scev_step) != INTEGER_CST > - || tree_int_cst_lt (scev_step, access_size)) > - scev_step = access_size; > - } > - /* Compute absolute value of scev step. */ > - else if (scev_dir == EV_DIR_DECREASES) > - scev_step = fold_build1 (NEGATE_EXPR, sizetype, scev_step); > - > - strides->safe_push (scev_step); > - } while (loop != loop_nest && (loop = loop_outer (loop)) != NULL); > + tree scev = analyze_scalar_evolution (loop, scev_base); > + scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev); > + if (! chrec_contains_undetermined (scev)) > + { > + tree sl = scev; > + struct loop *expected = loop; > + while (TREE_CODE (sl) == POLYNOMIAL_CHREC) > + { > + struct loop *sl_loop = get_chrec_loop (sl); > + while (sl_loop != expected) > + { > + strides->safe_push (size_int (0)); > + expected = loop_outer (expected); > + } > + strides->safe_push (CHREC_RIGHT (sl)); > + sl = CHREC_LEFT (sl); > + expected = loop_outer (expected); > + } > + if (! tree_contains_chrecs (sl, NULL)) > + while (expected != loop_outer (loop_nest)) > + { > + strides->safe_push (size_int (0)); > + expected = loop_outer (expected); > + } > + } > > dr->aux = strides; > } > @@ -1538,6 +1489,9 @@ should_interchange_loops (unsigned i_idx > struct data_reference *dr; > bool all_seq_dr_before_p = true, all_seq_dr_after_p = true; > widest_int iloop_strides = 0, oloop_strides = 0; > + unsigned num_unresolved_drs = 0; > + unsigned num_resolved_ok_drs = 0; > + unsigned num_resolved_not_ok_drs = 0; > > if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n"); > @@ -1546,28 +1500,42 @@ should_interchange_loops (unsigned i_idx > { > vec<tree> *stride = DR_ACCESS_STRIDE (dr); > tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx]; > - gcc_assert (TREE_CODE (iloop_stride) == INTEGER_CST); > - gcc_assert (TREE_CODE (oloop_stride) == INTEGER_CST); > > bool subloop_stride_p = false; > /* Data ref can't be invariant or sequential access at current loop if > its address changes with respect to any subloops. */ > for (j = i_idx + 1; j < stride->length (); ++j) > - if (integer_nonzerop ((*stride)[j])) > + if (!integer_zerop ((*stride)[j])) > { > subloop_stride_p = true; > break; > } > > - if (integer_nonzerop (iloop_stride)) > - iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride)); > - else if (!subloop_stride_p) > - num_old_inv_drs++; > - > - if (integer_nonzerop (oloop_stride)) > - oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride)); > - else if (!subloop_stride_p) > - num_new_inv_drs++; > + if (integer_zerop (iloop_stride)) > + { > + if (!subloop_stride_p) > + num_old_inv_drs++; > + } > + if (integer_zerop (oloop_stride)) > + { > + if (!subloop_stride_p) > + num_new_inv_drs++; > + } > + > + if (TREE_CODE (iloop_stride) == INTEGER_CST > + && TREE_CODE (oloop_stride) == INTEGER_CST) > + { > + iloop_strides = wi::add (iloop_strides, wi::to_widest > (iloop_stride)); > + oloop_strides = wi::add (oloop_strides, wi::to_widest > (oloop_stride)); > + } > + else if (multiple_of_p (TREE_TYPE (iloop_stride), > + iloop_stride, oloop_stride)) > + num_resolved_ok_drs++; > + else if (multiple_of_p (TREE_TYPE (iloop_stride), > + oloop_stride, iloop_stride)) > + num_resolved_not_ok_drs++; > + else > + num_unresolved_drs++; > > /* Data ref can't be sequential access if its address changes in sub > loop. */ > @@ -1581,10 +1549,12 @@ should_interchange_loops (unsigned i_idx > interchange. Note invariant is considered sequential here. */ > tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); > if (all_seq_dr_before_p > - && wi::gtu_p (wi::to_wide (iloop_stride), wi::to_wide > (access_size))) > + && ! (integer_zerop (iloop_stride) > + || operand_equal_p (access_size, iloop_stride, 0))) > all_seq_dr_before_p = false; > if (all_seq_dr_after_p > - && wi::gtu_p (wi::to_wide (oloop_stride), wi::to_wide > (access_size))) > + && ! (integer_zerop (oloop_stride) > + || operand_equal_p (access_size, oloop_stride, 0))) > all_seq_dr_after_p = false; > } > > @@ -1601,8 +1571,17 @@ should_interchange_loops (unsigned i_idx > fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n", > all_seq_dr_before_p ? "true" : "false", > all_seq_dr_after_p ? "true" : "false"); > + fprintf (dump_file, "OK to interchage with variable strides: %d\n", > + num_resolved_ok_drs); > + fprintf (dump_file, "Not OK to interchage with variable strides: %d\n", > + num_resolved_not_ok_drs); > + fprintf (dump_file, "Variable strides we cannot decide: %d\n", > + num_unresolved_drs); > } > > + if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0) > + return false; > + > /* We use different stride comparison ratio for interchanging innermost > two loops or not. The idea is to be conservative in interchange for > the innermost loops. */