> Hi Roman,
>
> you can get this information from the isl_ast_build that was used when
> generating a certain loop (you can access this isl_ast_build from the
> callbacks isl_ast_build_set_before_each_for and
> isl_ast_build_set_after_each_for). With isl_ast_build_get_schedule, you can
> get an incomplete schedule (less dimensions then the schedule that you gave
> to the isl ast generator). Specifically, it only contains the dimensions of
> the current loop and all surrounding ones. Consequently the last dimension
> in this incomplete schedule is the dimension you want to check for
> parallelism.
Hi Tobias,
thank you! I've attached a patch, which contains the first draft of
checking for the loop parallelism.
If I'm not mistaken, the depth, which can be obtained from
isl_ast_build, is only suitable for the incomplete schedule, which can
be obtained using isl_ast_build_get_schedule. That's why the temporary
implementation works with the incomplete schedule instead of the
result from scop_get_transformed_schedule.
I have a question about vect-pr43423.c. CLooG generates the following
code from this example:
vect-pr43423.c
for (scat_1=0;scat_1<=min(mid_6-1,n_5-1);scat_1++) {
(scat_1);
(scat_1);
}
for (scat_1=max(0,mid_6);scat_1<=n_5-1;scat_1++) {
(scat_1);
(scat_1);
}
This loops can be parallelized, according to the description of pr43423:
"...
void foo(int n, int mid)
{
int i;
for(i=0; i<n; i++)
{
if (i < mid)
a[i] = a[i] + b[i];
else
a[i] = a[i] + c[i];
}
}
chfang@pathscale:~/gcc$ gcc -O3 -ftree-vectorizer-verbose=7 -c foo.c
foo.c:6: note: not vectorized: control flow in loop.
foo.c:3: note: vectorized 0 loops in function.
This loop can be vectorized by icc.
For this case, I would expect to see two loops with iteration range
of [0, mid) and [mid, n). Then both loops can be vectorized.
..."
and the code of vect-pr43423.c:
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
ISL generates the following code:
for (int c1 = 0; c1 < n; c1 += 1) {
if (mid >= c1 + 1) {
S_6(c1);
} else
S_7(c1);
S_8(c1);
}
I think it can't be parallelized. Maybe this example is no more
suitable. What do you think about this?
--
Cheers, Roman Gareev.
Index: gcc/graphite-isl-ast-to-gimple.c
===================================================================
--- gcc/graphite-isl-ast-to-gimple.c (revision 213262)
+++ gcc/graphite-isl-ast-to-gimple.c (working copy)
@@ -435,7 +435,14 @@
redirect_edge_succ_nodup (next_e, after);
set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src);
- /* TODO: Add checking for the loop parallelism. */
+ if (flag_loop_parallelize_all)
+ {
+ isl_id *id = isl_ast_node_get_annotation (node_for);
+ gcc_assert (id);
+ if (isl_id_get_user (id) != NULL)
+ loop->can_be_parallel = true;
+ isl_id_free (id);
+ }
return last_e;
}
@@ -834,6 +841,97 @@
return schedule_isl;
}
+/* Applies SCHEDULE to the in and out dimensions of the dependences
+ DEPS and return the resulting relation. */
+
+static __isl_give isl_map *
+apply_schedule_on_deps (__isl_keep isl_union_map *schedule,
+ __isl_keep isl_union_map *deps)
+{
+ isl_map *x;
+ isl_union_map *ux, *trans;
+
+ trans = isl_union_map_copy (schedule);
+ trans = extend_schedule (trans);
+ ux = isl_union_map_copy (deps);
+ ux = isl_union_map_apply_domain (ux, isl_union_map_copy (trans));
+ ux = isl_union_map_apply_range (ux, trans);
+ if (isl_union_map_is_empty (ux))
+ {
+ isl_union_map_free (ux);
+ return NULL;
+ }
+ x = isl_map_from_union_map (ux);
+
+ return x;
+}
+
+/* Return true when DEPS is non empty and the intersection of LEX with
+ the DEPS transformed by SCHEDULE is non empty. LEX is the relation
+ in which all the inputs before DEPTH occur at the same time as the
+ output, and the input at DEPTH occurs before output. */
+
+static bool
+carries_deps (__isl_keep isl_union_map *schedule,
+ __isl_keep isl_union_map *deps,
+ int depth)
+{
+ bool res;
+ int i;
+ isl_space *space;
+ isl_map *lex, *x;
+ isl_constraint *ineq;
+
+ if (isl_union_map_is_empty (deps))
+ return false;
+
+ x = apply_schedule_on_deps (schedule, deps);
+ if (x == NULL)
+ return false;
+ space = isl_map_get_space (x);
+ space = isl_space_range (space);
+ lex = isl_map_lex_le (space);
+ space = isl_map_get_space (x);
+ ineq = isl_inequality_alloc (isl_local_space_from_space (space));
+
+ for (i = 0; i < depth - 1; i++)
+ lex = isl_map_equate (lex, isl_dim_in, i, isl_dim_out, i);
+
+ /* in + 1 <= out */
+ ineq = isl_constraint_set_coefficient_si (ineq, isl_dim_out, depth - 1, 1);
+ ineq = isl_constraint_set_coefficient_si (ineq, isl_dim_in, depth - 1, -1);
+ ineq = isl_constraint_set_constant_si (ineq, -1);
+ lex = isl_map_add_constraint (lex, ineq);
+ x = isl_map_intersect (x, lex);
+ res = !isl_map_is_empty (x);
+
+ isl_map_free (x);
+ return res;
+}
+
+/* This method is executed before the construction of a for node. */
+static __isl_give isl_id *
+ast_build_before_for (__isl_keep isl_ast_build *build, void *user)
+{
+ scop_p scop = (scop_p) user;
+ isl_ast_build *pointer = NULL;
+ isl_union_map *schedule = isl_ast_build_get_schedule (build);
+ isl_space *schedule_space = isl_ast_build_get_schedule_space (build);
+ int dimension = isl_space_dim (schedule_space, isl_dim_out) - 1;
+ int res = (carries_deps (schedule, scop->must_raw, dimension)
+ || carries_deps (schedule, scop->may_raw, dimension)
+ || carries_deps (schedule, scop->must_war, dimension)
+ || carries_deps (schedule, scop->may_war, dimension)
+ || carries_deps (schedule, scop->must_waw, dimension)
+ || carries_deps (schedule, scop->may_waw, dimension));
+ if (!res)
+ pointer = build;
+ isl_union_map_free (schedule);
+ isl_space_free (schedule_space);
+ isl_id *id = isl_id_alloc (isl_ast_build_get_ctx (build), "", pointer);
+ return id;
+}
+
static __isl_give isl_ast_node *
scop_to_isl_ast (scop_p scop, ivs_params &ip)
{
@@ -846,6 +944,32 @@
add_parameters_to_ivs_params (scop, ip);
isl_union_map *schedule_isl = generate_isl_schedule (scop);
isl_ast_build *context_isl = generate_isl_context (scop);
+ if (flag_loop_parallelize_all)
+ {
+ if (!scop->must_raw &&
+ !scop->may_raw &&
+ !scop->must_raw_no_source &&
+ !scop->may_raw_no_source &&
+ !scop->must_war &&
+ !scop->may_war &&
+ !scop->must_war_no_source &&
+ !scop->may_war_no_source &&
+ !scop->must_waw &&
+ !scop->may_waw &&
+ !scop->must_waw_no_source &&
+ !scop->may_waw_no_source)
+ compute_deps (scop, SCOP_BBS (scop),
+ &scop->must_raw, &scop->may_raw,
+ &scop->must_raw_no_source, &scop->may_raw_no_source,
+ &scop->must_war, &scop->may_war,
+ &scop->must_war_no_source, &scop->may_war_no_source,
+ &scop->must_waw, &scop->may_waw,
+ &scop->must_waw_no_source, &scop->may_waw_no_source);
+
+ context_isl =
+ isl_ast_build_set_before_each_for (context_isl, ast_build_before_for,
+ scop);
+ }
isl_ast_node *ast_isl = isl_ast_build_ast_from_schedule (context_isl,
schedule_isl);
isl_ast_build_free (context_isl);