The close of stage 1 is getting close (very close). Even there is not so much
new code (basically
the new code computes the separation class option for AST build), I am not sure
that the patch
qualify for stage 2.
There is very nice code generated by unroll-and-jam (stride mining) for small
kernels both for constant
or non-constant bound loops, and is an argument for the new isl based code
generator. Otherwise I'm afraid
that the code generated looks very similar with the cloog generated one, an
inner loop
with bounds of min/max that GCC doesn't further optimize, preventing perceived
advantages of
strip mining (register reuse and scalar reduction, instruction scheduling etc).
ok for trunk ?
Thanks, Mircea
Index: gcc/graphite-poly.h
===
--- gcc/graphite-poly.h (revision 217013)
+++ gcc/graphite-poly.h (working copy)
@@ -349,6 +349,9 @@
poly_scattering_p _saved;
isl_map *saved;
+ /* For tiling, the map for computing the separating class. */
+ isl_map *map_sepclass;
+
/* True when this PBB contains only a reduction statement. */
bool is_reduction;
};
Index: gcc/graphite.c
===
--- gcc/graphite.c (revision 217013)
+++ gcc/graphite.c (working copy)
@@ -383,7 +383,8 @@
|| flag_loop_strip_mine
|| flag_graphite_identity
|| flag_loop_parallelize_all
- || flag_loop_optimize_isl)
+ || flag_loop_optimize_isl
+ || flag_loop_unroll_jam)
flag_graphite = 1;
return flag_graphite != 0;
Index: gcc/common.opt
===
--- gcc/common.opt (revision 217013)
+++ gcc/common.opt (working copy)
@@ -1328,6 +1328,10 @@
Common Report Var(flag_loop_block) Optimization
Enable Loop Blocking transformation
+floop-unroll-and-jam
+Common Report Var(flag_loop_unroll_jam) Optimization
+Enable Loop Unroll Jam transformation
+
fgnu-tm
Common Report Var(flag_tm)
Enable support for GNU transactional memory
Index: gcc/graphite-optimize-isl.c
===
--- gcc/graphite-optimize-isl.c (revision 217013)
+++ gcc/graphite-optimize-isl.c (working copy)
@@ -186,7 +186,7 @@
PartialSchedule = isl_band_get_partial_schedule (Band);
*Dimensions = isl_band_n_member (Band);
- if (DisableTiling)
+ if (DisableTiling || flag_loop_unroll_jam)
return PartialSchedule;
/* It does not make any sense to tile a band with just one dimension. */
@@ -241,7 +241,9 @@
constant number of iterations, if the number of loop iterations at
DimToVectorize can be devided by VectorWidth. The default VectorWidth is
currently constant and not yet target specific. This function does not reason
- about parallelism. */
+ about parallelism.
+
+ */
static isl_map *
getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
int ScheduleDimensions,
@@ -305,8 +307,98 @@
isl_constraint_set_constant_si (c, VectorWidth - 1);
TilingMap = isl_map_add_constraint (TilingMap, c);
- isl_map_dump (TilingMap);
+ return TilingMap;
+}
+/* Compute an auxiliary map to getPrevectorMap, for computing the separating
+ class defined by full tiles. Used in graphite_isl_ast_to_gimple.c to set the
+ corresponding option for AST build.
+
+ The map (for VectorWidth=4):
+
+ [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and it + 3 = i and
+ip >= 0
+
+ The image of this map is the separation class. The range of this map includes
+ all the i that are multiple of 4 in the domain beside the greater one.
+
+ */
+static isl_map *
+getPrevectorMap_full (isl_ctx *ctx, int DimToVectorize,
+ int ScheduleDimensions,
+ int VectorWidth)
+{
+ isl_space *Space;
+ isl_local_space *LocalSpace, *LocalSpaceRange;
+ isl_set *Modulo;
+ isl_map *TilingMap;
+ isl_constraint *c;
+ isl_aff *Aff;
+ int PointDimension; /* ip */
+ int TileDimension; /* it */
+ isl_val *VectorWidthMP;
+ int i;
+
+ /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
+
+ Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
+ TilingMap = isl_map_universe (isl_space_copy (Space));
+ LocalSpace = isl_local_space_from_space (Space);
+ PointDimension = ScheduleDimensions;
+ TileDimension = DimToVectorize;
+
+ /* Create an identity map for everything except DimToVectorize and the
+ point loop. */
+ for (i = 0; i < ScheduleDimensions; i++)
+{
+ if (i == DimToVectorize)
+continue;
+
+ c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
+
+ isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
+ isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
+
+ TilingMap = isl_map_add_constraint (TilingMap, c);
+}
+
+ /* it % 'VectorWidth' = 0 */
+ LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
+