[ was: Re: [PR65637][PATCH][3/3] Handle 2 preds for fin_bb in
expand_omp_for_static_chunk ]
On 15/04/15 15:23, Tom de Vries wrote:
On 15-04-15 15:10, Tom de Vries wrote:
Hi,
This patch series fixes PR65637.
<SNIP>
This patch fixes compilation of autopar/reduc-3.c in
expand_omp_for_static_chunk.
We encounter two situations in expand_omp_for_static_chunk:
1. single_pred_p (fin_bb)
This situation happens for f.i. autopar-1.c, which uses a compile-time
constant loop bound.
2. !single_pred_p (fin_bb)
This situation happens for autopar/reduc-3.c, which uses a compile-time
unknown loop bound.
The two situations are represented as control flow graphs here:
...
1.
x
|
|
*
entry_bb
|
|
*
iter_part_bb --* seq_start_bb
| * |
| \ ...
* \ |
fin_bb \ *
| -- trip_update_bb
|
*
x
2.
x
|
|
*
region.entry --* entry_bb
| |
| |
* *
fin_bb *-- iter_part_bb --* seq_start_bb
| * |
| \ ...
* \ |
x \ *
-- trip_update_bb
...
This patch handles the !single_pred_p (fin_bb) scenario, while keeping the
single_pred_p (fin_bb) scenario undisturbed.
With the patch, the resulting split-off function looks like this:
...
main1._loopfn.0 (voidD.41 * .paral_data_paramD.2498)
{
;; basic block 2, loop depth 0, count 0, freq 79, maybe hot
;; prev block 0, next block 3, flags: (NEW, REACHABLE)
;; pred: ENTRY (FALLTHRU)
.paral_data_param_2 = .paral_data_param_1(D);
.paral_data_load.12_3 = (struct *) .paral_data_param_2;
# VUSE <.MEM_33(D)>
_4 = .paral_data_load.12_3->D.2490;
# VUSE <.MEM_33(D)>
ub_5 = .paral_data_load.12_3->ubD.2491;
# VUSE <.MEM_33(D)>
uc_6 = .paral_data_load.12_3->ucD.2492;
if (0 < _4)
goto <bb 4>;
else
goto <bb 3>;
;; succ: 4 [100.0%] (TRUE_VALUE)
;; 3 [0.0%] (FALSE_VALUE)
;; basic block 3, loop depth 0, count 0, freq 0, maybe hot
;; prev block 2, next block 4, flags: (NEW, REACHABLE)
;; pred: 2 [0.0%] (FALSE_VALUE)
;; 5 (FALSE_VALUE)
# udiff.8_7 = PHI <0(2), udiff.8_8(5)>
_9 = &.paral_data_load.12_3->udiff.8D.2493;
# .MEM_34 = VDEF <.MEM_33(D)>
# USE = anything
# CLB = anything
__atomic_fetch_add_4D.1247 (_9, udiff.8_7, 0);
# VUSE <.MEM_34>
return;
;; succ: EXIT
;; basic block 4, loop depth 0, count 0, freq 79, maybe hot
;; prev block 3, next block 5, flags: (NEW, REACHABLE)
;; pred: 2 [100.0%] (TRUE_VALUE)
_10 = omp_get_num_threadsD.1287 ();
_11 = (unsigned int) _10;
_12 = omp_get_thread_numD.1286 ();
_13 = (unsigned int) _12;
.trip.13_14 = 0;
;; succ: 5 [100.0%] (FALLTHRU)
;; basic block 5, loop depth 1, count 0, freq 79, maybe hot
;; prev block 4, next block 6, flags: (NEW, REACHABLE)
;; pred: 4 [100.0%] (FALLTHRU)
;; 8 [100.0%] (FALLTHRU)
# udiff.8_8 = PHI <0(4), udiff.8_15(8)>
# .trip.13_16 = PHI <.trip.13_14(4), .trip.13_17(8)>
_18 = _11 * .trip.13_16;
_19 = _13 + _18;
_20 = _19 + 1;
_21 = MIN_EXPR <_4, _20>;
if (_19 < _4)
goto <bb 6>;
else
goto <bb 3>;
;; succ: 6 [100.0%] (TRUE_VALUE)
;; 3 (FALSE_VALUE)
;; basic block 6, loop depth 1, count 0, freq 79, maybe hot
;; prev block 5, next block 7, flags: (NEW, REACHABLE)
;; pred: 5 [100.0%] (TRUE_VALUE)
ivtmp_22 = _19;
;; succ: 7 [100.0%] (FALLTHRU)
;; basic block 7, loop depth 2, count 0, freq 7920, maybe hot
;; prev block 6, next block 8, flags: (NEW, REACHABLE)
;; pred: 6 [100.0%] (FALLTHRU)
;; 7 [100.0%] (TRUE_VALUE)
# udiff.8_23 = PHI <udiff.8_8(6), udiff.8_15(7)>
# ivtmp_24 = PHI <ivtmp_22(6), ivtmp_25(7)>
i.9_28 = (intD.6) ivtmp_24;
# VUSE <.MEM_33(D)>
_29 = *ub_5[i.9_28];
# VUSE <.MEM_33(D)>
_30 = *uc_6[i.9_28];
_31 = _29 - _30;
udiff.8_15 = udiff.8_23 + _31;
i.9_32 = i.9_28 + 1;
ivtmp_25 = ivtmp_24 + 1;
if (ivtmp_25 < _21)
goto <bb 7>;
else
goto <bb 8>;
;; succ: 7 [100.0%] (TRUE_VALUE)
;; 8 (FALSE_VALUE)
;; basic block 8, loop depth 1, count 0, freq 0, maybe hot
;; prev block 7, next block 1, flags: (NEW, REACHABLE)
;; pred: 7 (FALSE_VALUE)
.trip.13_17 = .trip.13_16 + 1;
goto <bb 5>;
;; succ: 5 [100.0%] (FALLTHRU)
}
...
OK for trunk?
This updated patch includes a test-case.
Thanks,
- Tom
Handle 2 preds for fin_bb in expand_omp_for_static_chunk
2015-08-31 Tom de Vries <t...@codesourcery.com>
PR tree-optimization/65637
* omp-low.c (expand_omp_for_static_chunk): Handle case that fin_bb has 2
predecessors.
* gcc.dg/autopar/reduc-3-chunk-size.c: New test.
---
gcc/ChangeLog | 6 +++
gcc/omp-low.c | 26 +++++++----
gcc/testsuite/gcc.dg/autopar/reduc-3-chunk-size.c | 56 +++++++++++++++++++++++
3 files changed, 79 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/autopar/reduc-3-chunk-size.c
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a14564c..c9e426f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,12 @@
2015-05-18 Tom de Vries <t...@codesourcery.com>
PR tree-optimization/65637
+ * omp-low.c (expand_omp_for_static_chunk): Handle case that fin_bb has 2
+ predecessors.
+
+2015-05-18 Tom de Vries <t...@codesourcery.com>
+
+ PR tree-optimization/65637
* omp-low.c (find_phi_with_arg_on_edge): New function.
(expand_omp_for_static_chunk): Fix inner loop phi.
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index e2be7c7..f3257ac 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -7033,7 +7033,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
se->probability = REG_BR_PROB_BASE / 2000 - 1;
if (gimple_in_ssa_p (cfun))
{
- int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
+ int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
for (gphi_iterator gpi = gsi_start_phis (fin_bb);
!gsi_end_p (gpi); gsi_next (&gpi))
{
@@ -7314,7 +7314,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
/* When we redirect the edge from trip_update_bb to iter_part_bb, we
remove arguments of the phi nodes in fin_bb. We need to create
appropriate phi nodes in iter_part_bb instead. */
- se = single_pred_edge (fin_bb);
+ se = find_edge (iter_part_bb, fin_bb);
re = single_succ_edge (trip_update_bb);
vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
ene = single_succ_edge (entry_bb);
@@ -7329,6 +7329,10 @@ expand_omp_for_static_chunk (struct omp_region *region,
phi = psi.phi ();
t = gimple_phi_result (phi);
gcc_assert (t == redirect_edge_var_map_result (vm));
+
+ if (!single_pred_p (fin_bb))
+ t = copy_ssa_name (t, phi);
+
nphi = create_phi_node (t, iter_part_bb);
t = PHI_ARG_DEF_FROM_EDGE (phi, se);
@@ -7353,16 +7357,20 @@ expand_omp_for_static_chunk (struct omp_region *region,
gcc_assert (inner_loop_phi != NULL);
add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
find_edge (seq_start_bb, body_bb), locus);
+
+ if (!single_pred_p (fin_bb))
+ add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
}
gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
redirect_edge_var_map_clear (re);
- while (1)
- {
- psi = gsi_start_phis (fin_bb);
- if (gsi_end_p (psi))
- break;
- remove_phi_node (&psi, false);
- }
+ if (single_pred_p (fin_bb))
+ while (1)
+ {
+ psi = gsi_start_phis (fin_bb);
+ if (gsi_end_p (psi))
+ break;
+ remove_phi_node (&psi, false);
+ }
/* Make phi node for trip. */
phi = create_phi_node (trip_main, iter_part_bb);
diff --git a/gcc/testsuite/gcc.dg/autopar/reduc-3-chunk-size.c b/gcc/testsuite/gcc.dg/autopar/reduc-3-chunk-size.c
new file mode 100644
index 0000000..ca4ab2f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autopar/reduc-3-chunk-size.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized --param parloops-chunk-size=100" } */
+
+#include <stdarg.h>
+#include <stdlib.h>
+
+#define N 1600
+
+unsigned int ub[N];
+unsigned int uc[N];
+
+/* Reduction of unsigned-int. */
+
+int __attribute__ ((noinline))
+main1 (int n, int res)
+{
+ int i;
+ unsigned int udiff;
+
+ udiff = 0;
+ for (i = 0; i < n; i++) {
+ udiff += (ub[i] - uc[i]);
+ }
+
+ /* check results: */
+ if (udiff != res)
+ abort ();
+
+ return 0;
+}
+
+void __attribute__((noinline))
+init_arrays ()
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ ub[i] = i * 3;
+ uc[i] = i;
+ }
+}
+
+int
+main (void)
+{
+ init_arrays ();
+ main1 (N, 2558400);
+ main1 (N-1, 2555202);
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "Detected reduction" 1 "parloops" } } */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
+
--
1.9.1