Hi,
I noticed that there is only one asyncwait testcase for C on trunk.
I've rewritten asyncwait-{1,2,3}.f90 into C (and changed the float math
into int math to keep things as simple as possible).
Tested on top of trunk for host.
Tested on top of trunk, gcc-7-branch, openacc-gcc-7-branch,
gomp-4-branch for nvptx.
On trunk for nvptx, I'm seeing execution failures at -O2. I've verified
that I see the same failures with all the async and wait clauses
removed. Also, it's not the only failure at -O2 for trunk, so that's
probably some orthogonal issue.
Committed as obvious.
Thanks,
- Tom
Add libgomp.oacc-c-c++-common/f-asyncwait-{1,2,3}.c
2017-11-15 Tom de Vries <t...@codesourcery.com>
* testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c: New test, copied
from asyncwait-1.f90. Rewrite into C. Rewrite from float to int.
* testsuite/libgomp.oacc-c-c++-common/f-asyncwait-2.c: New test, copied
from asyncwait-2.f90. Rewrite into C. Rewrite from float to int.
* testsuite/libgomp.oacc-c-c++-common/f-asyncwait-3.c: New test, copied
from asyncwait-3.f90. Rewrite into C. Rewrite from float to int.
---
.../libgomp.oacc-c-c++-common/f-asyncwait-1.c | 297 +++++++++++++++++++++
.../libgomp.oacc-c-c++-common/f-asyncwait-2.c | 61 +++++
.../libgomp.oacc-c-c++-common/f-asyncwait-3.c | 63 +++++
3 files changed, 421 insertions(+)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c
new file mode 100644
index 0000000..cf85170
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c
@@ -0,0 +1,297 @@
+/* { dg-do run } */
+
+/* Based on asyncwait-1.f90. */
+
+#include <stdlib.h>
+
+#define N 64
+
+int
+main (void)
+{
+ int *a, *b, *c, *d, *e;
+
+ a = (int*)malloc (N * sizeof (*a));
+ b = (int*)malloc (N * sizeof (*b));
+ c = (int*)malloc (N * sizeof (*c));
+ d = (int*)malloc (N * sizeof (*d));
+ e = (int*)malloc (N * sizeof (*e));
+
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = 3;
+ b[i] = 0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N])
+ {
+
+#pragma acc parallel async
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ b[i] = a[i];
+
+#pragma acc wait
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ if (a[i] != 3)
+ abort ();
+ if (b[i] != 3)
+ abort ();
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = 2;
+ b[i] = 0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N])
+ {
+#pragma acc parallel async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ b[i] = a[i];
+
+#pragma acc wait (1)
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ if (a[i] != 2) abort ();
+ if (b[i] != 2) abort ();
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = 3;
+ b[i] = 0;
+ c[i] = 0;
+ d[i] = 0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N])
+ {
+
+#pragma acc parallel async (1)
+ for (int i = 0; i < N; ++i)
+ b[i] = (a[i] * a[i] * a[i]) / a[i];
+
+#pragma acc parallel async (1)
+ for (int i = 0; i < N; ++i)
+ c[i] = (a[i] * 4) / a[i];
+
+
+#pragma acc parallel async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
+
+#pragma acc wait (1)
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ if (a[i] != 3)
+ abort ();
+ if (b[i] != 9)
+ abort ();
+ if (c[i] != 4)
+ abort ();
+ if (d[i] != 1)
+ abort ();
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = 2;
+ b[i] = 0;
+ c[i] = 0;
+ d[i] = 0;
+ e[i] = 0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N])
+ {
+
+#pragma acc parallel async (1)
+ for (int i = 0; i < N; ++i)
+ b[i] = (a[i] * a[i] * a[i]) / a[i];
+
+#pragma acc parallel async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ c[i] = (a[i] * 4) / a[i];
+
+#pragma acc parallel async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
+
+
+#pragma acc parallel wait (1) async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ e[i] = a[i] + b[i] + c[i] + d[i];
+
+#pragma acc wait (1)
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ if (a[i] != 2)
+ abort ();
+ if (b[i] != 4)
+ abort ();
+ if (c[i] != 4)
+ abort ();
+ if (d[i] != 1)
+ abort ();
+ if (e[i] != 11)
+ abort ();
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = 3;
+ b[i] = 0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N])
+ {
+
+#pragma acc kernels async
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ b[i] = a[i];
+
+#pragma acc wait
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ if (a[i] != 3)
+ abort ();
+ if (b[i] != 3)
+ abort ();
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = 2;
+ b[i] = 0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N])
+ {
+#pragma acc kernels async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ b[i] = a[i];
+
+#pragma acc wait (1)
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ if (a[i] != 2)
+ abort ();
+ if (b[i] != 2)
+ abort ();
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = 3;
+ b[i] = 0;
+ c[i] = 0;
+ d[i] = 0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N])
+ {
+#pragma acc kernels async (1)
+ for (int i = 0; i < N; ++i)
+ b[i] = (a[i] * a[i] * a[i]) / a[i];
+
+#pragma acc kernels async (1)
+ for (int i = 0; i < N; ++i)
+ c[i] = (a[i] * 4) / a[i];
+
+#pragma acc kernels async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
+
+#pragma acc wait (1)
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ if (a[i] != 3)
+ abort ();
+ if (b[i] != 9)
+ abort ();
+ if (c[i] != 4)
+ abort ();
+ if (d[i] != 1)
+ abort ();
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ a[i] = 2;
+ b[i] = 0;
+ c[i] = 0;
+ d[i] = 0;
+ e[i] = 0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N])
+ {
+#pragma acc kernels async (1)
+ for (int i = 0; i < N; ++i)
+ b[i] = (a[i] * a[i] * a[i]) / a[i];
+
+#pragma acc kernels async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ c[i] = (a[i] * 4) / a[i];
+
+#pragma acc kernels async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
+
+#pragma acc kernels wait (1) async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ e[i] = a[i] + b[i] + c[i] + d[i];
+
+#pragma acc wait (1)
+ }
+
+ for (int i = 0; i < N; ++i)
+ {
+ if (a[i] != 2)
+ abort ();
+ if (b[i] != 4)
+ abort ();
+ if (c[i] != 4)
+ abort ();
+ if (d[i] != 1)
+ abort ();
+ if (e[i] != 11)
+ abort ();
+ }
+
+ free (a);
+ free (b);
+ free (c);
+ free (d);
+ free (e);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-2.c
new file mode 100644
index 0000000..5298e4c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-2.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+
+/* Based on asyncwait-2.f90. */
+
+#include <stdlib.h>
+
+#define N 64
+
+int *a, *b, *c;
+
+int
+main (void)
+{
+ a = (int *)malloc (N * sizeof (*a));
+ b = (int *)malloc (N * sizeof (*b));
+ c = (int *)malloc (N * sizeof (*c));
+
+#pragma acc parallel copy (a[0:N]) async (0)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ a[i] = 1;
+
+#pragma acc parallel copy (b[0:N]) async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ b[i] = 1;
+
+#pragma acc parallel copy (a[0:N], b[0:N], c[0:N]) wait (0, 1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ c[i] = a[i] + b[i];
+
+ for (int i = 0; i < N; ++i)
+ if (c[i] != 2)
+ abort ();
+
+#if 1
+#pragma acc kernels copy (a[0:N]) async (0)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ a[i] = 1;
+
+#pragma acc kernels copy (b[0:N]) async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ b[i] = 1;
+
+#pragma acc kernels copy (a[0:N], b[0:N], c[0:N]) wait (0, 1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ c[i] = a[i] + b[i];
+
+ for (int i = 0; i < N; ++i)
+ if (c[i] != 2)
+ abort ();
+#endif
+
+ free (a);
+ free (b);
+ free (c);
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-3.c
new file mode 100644
index 0000000..319eea6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-3.c
@@ -0,0 +1,63 @@
+/* { dg-do run } */
+
+/* Based on asyncwait-3.f90. */
+
+#include <stdlib.h>
+
+#define N 64
+
+int
+main (void)
+{
+ int *a, *b, *c;
+
+ a = (int *)malloc (N * sizeof (*a));
+ b = (int *)malloc (N * sizeof (*b));
+ c = (int *)malloc (N * sizeof (*c));
+
+#pragma acc parallel copy (a[0:N]) async (0)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ a[i] = 1;
+
+#pragma acc parallel copy (b[0:N]) async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ b[i] = 1;
+
+#pragma acc wait (0, 1)
+
+#pragma acc parallel copy (a[0:N], b[0:N], c[0:N])
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ c[i] = a[i] + b[i];
+
+ for (int i = 0; i < N; ++i)
+ if (c[i] != 2)
+ abort ();
+
+#pragma acc kernels copy (a[0:N]) async (0)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ a[i] = 1;
+
+#pragma acc kernels copy (b[0:N]) async (1)
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ b[i] = 1;
+
+#pragma acc wait (0, 1)
+
+#pragma acc kernels copy (a[0:N], b[0:N], c[0:N])
+#pragma acc loop
+ for (int i = 0; i < N; ++i)
+ c[i] = a[i] + b[i];
+
+ for (int i = 0; i < N; ++i)
+ if (c[i] != 2)
+ abort ();
+
+ free (a);
+ free (b);
+ free (c);
+}