This fixes a tree-sharing ICE. It seems as if all unshare_expr
I added were required in this case. The first long testcase is
based on the real testcase from the OpenACC testsuite, the second
one is what reduction produced - but I thought some nested reduction
might be interesting as well; hence, I included both tests.
Bootstrapped and regtested on x86-64-gnu-linux w/o offloading.
OK for mainline and GCC 12?
(It gives an ICE with GCC 10 but not with GCC 9; thus,
more regression-fix backporting would be possible,
if someone cares.)
Tobias
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht
München, HRB 106955
OpenACC: Fix reduction tree-sharing issue [PR106982]
The tree for var == incoming == outgound was
'MEM <double[5]> [(double *)&reduced]' which caused the ICE
"incorrect sharing of tree nodes".
PR middle-end/106982
gcc/ChangeLog:
* omp-low.cc (lower_oacc_reductions): Add some unshare_expr.
gcc/testsuite/ChangeLog:
* c-c++-common/goacc/reduction-7.c: New test.
* c-c++-common/goacc/reduction-8.c: New test.
gcc/omp-low.cc | 17 ++++++++++++-----
gcc/testsuite/c-c++-common/goacc/reduction-7.c | 22 ++++++++++++++++++++++
gcc/testsuite/c-c++-common/goacc/reduction-8.c | 12 ++++++++++++
3 files changed, 46 insertions(+), 5 deletions(-)
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index f0469d20b3d..8e07fb5d8a8 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -7631,7 +7631,12 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
incoming = build_simple_mem_ref (incoming);
}
else
- v1 = v2 = v3 = var;
+ {
+ v1 = unshare_expr (var);
+ v2 = unshare_expr (var);
+ v3 = unshare_expr (var);
+ outgoing = unshare_expr (outgoing);
+ }
/* Determine position in reduction buffer, which may be used
by target. The parser has ensured that this is not a
@@ -7659,21 +7664,23 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, setup_code,
unshare_expr (ref_to_res),
- incoming, level, op, off);
+ unshare_expr (incoming), level,
+ op, off);
tree init_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, init_code,
unshare_expr (ref_to_res),
- v1, level, op, off);
+ unshare_expr (v1), level, op, off);
tree fini_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, fini_code,
unshare_expr (ref_to_res),
- v2, level, op, off);
+ unshare_expr (v2), level, op, off);
tree teardown_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, teardown_code,
- ref_to_res, v3, level, op, off);
+ ref_to_res, unshare_expr (v3),
+ level, op, off);
gimplify_assign (v1, setup_call, &before_fork);
gimplify_assign (v2, init_call, &after_fork);
diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-7.c b/gcc/testsuite/c-c++-common/goacc/reduction-7.c
new file mode 100644
index 00000000000..482b0ab1984
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/reduction-7.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+
+/* PR middle-end/106982 */
+
+long long n = 100;
+int multiplicitive_n = 128;
+
+void test1(double *rand, double *a, double *b, double *c)
+{
+#pragma acc data copyin(a[0:10*multiplicitive_n], b[0:10*multiplicitive_n]) copyout(c[0:10])
+ {
+#pragma acc parallel loop
+ for (int i = 0; i < 10; ++i)
+ {
+ double temp = 1.0;
+#pragma acc loop vector reduction(*:temp)
+ for (int j = 0; j < multiplicitive_n; ++j)
+ temp *= a[(i * multiplicitive_n) + j] + b[(i * multiplicitive_n) + j];
+ c[i] = temp;
+ }
+ }
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-8.c b/gcc/testsuite/c-c++-common/goacc/reduction-8.c
new file mode 100644
index 00000000000..2c3ed499d5b
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/reduction-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+
+/* PR middle-end/106982 */
+
+void test1(double *c)
+{
+ double reduced[5];
+#pragma acc parallel loop gang private(reduced)
+ for (int x = 0; x < 5; ++x)
+#pragma acc loop worker reduction(*:reduced)
+ for (int y = 0; y < 5; ++y) { }
+}