The code I wrote to determine partitions on auto loops was too complicated,
making it hard for me to figure out the changes needed for tiling. I've
committed this patch, which doesn't do such egregious bit twiddling.
nathan
2016-09-30 Nathan Sidwell <nat...@codesourcery.com>
* omp-low.c (oacc_loop_auto_partition): Simplify logic.
Index: omp-low.c
===================================================================
--- omp-low.c (revision 240664)
+++ omp-low.c (working copy)
@@ -19867,15 +19867,19 @@ oacc_loop_auto_partitions (oacc_loop *lo
{
/* Allocate outermost and non-innermost loops at the outermost
non-innermost available level. */
- unsigned this_mask = outer_mask + 1;
+ unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
+
+ /* Find the first outermost available partition. */
+ while (this_mask <= outer_mask)
+ this_mask <<= 1;
+
+ /* Prohibit the innermost partitioning at the moment. */
+ this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
- /* Make sure it's the single outermost available partition. */
- while (this_mask != (this_mask & -this_mask))
- this_mask += this_mask & -this_mask;
-
- if (!(this_mask & (loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX)
- | GOMP_DIM_MASK (GOMP_DIM_MAX - 1))))
- loop->mask = this_mask;
+ /* Don't use any dimension explicitly claimed by an inner loop. */
+ this_mask &= ~loop->inner;
+
+ loop->mask = this_mask;
}
if (loop->child)
@@ -19897,15 +19901,13 @@ oacc_loop_auto_partitions (oacc_loop *lo
/* Pick the partitioning just inside that one. */
this_mask >>= 1;
-
/* And avoid picking one use by an outer loop. */
this_mask &= ~outer_mask;
- if (!this_mask && !loop->mask && noisy)
+ loop->mask |= this_mask;
+ if (!loop->mask && noisy)
warning_at (loop->loc, 0,
"insufficient partitioning available to parallelize loop");
-
- loop->mask |= this_mask;
}
if (assign && dump_file)