Hi! This patch fixes worksharing loops containing both conditional lastprivate and inscan reduction(s). Furthermore, it for nowait omp for ensures there is GOMP_loop_end_nowait call at the end after the second loop in scan and not after the first one.
Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2019-07-04 Jakub Jelinek <ja...@redhat.com> * omp-expand.c (expand_omp_for_static_nochunk): Don't emit GOMP_loop_start at the start of second worksharing loop in a scan. For nowait, don't emit GOMP_loop_end_nowait at the end of first worksharing loop in a scan even if there are conditional lastprivates, and do emit GOMP_loop_end_nowait at the end of second worksharing loop. * testsuite/libgomp.c/scan-9.c: New test. * testsuite/libgomp.c/scan-10.c: New test. --- gcc/omp-expand.c.jj 2019-07-03 07:02:16.511989061 +0200 +++ gcc/omp-expand.c 2019-07-04 11:54:22.576366802 +0200 @@ -3744,7 +3744,9 @@ expand_omp_for_static_nochunk (struct om cond_var = OMP_CLAUSE_DECL (c); } if (fd->have_reductemp - || fd->have_pointer_condtemp + /* For scan, we don't want to reinitialize condtemp before the + second loop. */ + || (fd->have_pointer_condtemp && !fd->have_scantemp) || fd->have_nonctrl_scantemp) { tree t1 = build_int_cst (long_integer_type_node, 0); @@ -4235,7 +4237,8 @@ expand_omp_for_static_nochunk (struct om else gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); } - else if (fd->have_pointer_condtemp) + else if ((fd->have_pointer_condtemp || fd->have_scantemp) + && !fd->have_nonctrl_scantemp) { tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); gcall *g = gimple_build_call (fn, 0); --- libgomp/testsuite/libgomp.c/scan-9.c.jj 2019-07-04 12:04:48.235360731 +0200 +++ libgomp/testsuite/libgomp.c/scan-9.c 2019-07-04 11:08:54.038069390 +0200 @@ -0,0 +1,116 @@ +/* { dg-require-effective-target size32plus } */ + +extern void abort (void); +int r, a[1024], b[1024], x, y, z; + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp for reduction (inscan, +:r) lastprivate (conditional: z) firstprivate (x) private (y) + for (int i = 0; i < 1024; i++) + { + { y = a[i]; r += y + x + 12; } + #pragma omp scan inclusive(r) + { b[i] = r; if ((i & 1) == 0 && i < 937) z = r; } + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, +:s) firstprivate (x) private (y) lastprivate (z) + for (int i = 0; i < 1024; i++) + { + { y = 2 * a[i]; s += y; z = y; } + #pragma omp scan inclusive(s) + { y = s; b[i] = y + x + 12; } + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp parallel for reduction (inscan, +:r) firstprivate (x) lastprivate (x) + for (int i = 0; i < 1024; i++) + { + { r += a[i] + x + 12; if (i == 1023) x = 29; } + #pragma omp scan inclusive(r) + b[i] = r; + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp parallel for reduction (inscan, +:s) lastprivate (conditional: x, y) + for (int i = 0; i < 1024; i++) + { + { s += 2 * a[i]; if ((a[i] & 1) == 1 && i < 825) x = a[i]; } + #pragma omp scan inclusive(s) + { b[i] = s; if ((a[i] & 1) == 0 && i < 829) y = a[i]; } + } + return s; +} + +int +main () +{ + int s = 0; + x = -12; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r != 1024 * 1023 / 2 || x != -12 || z != b[936]) + abort (); + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = 25; + } + if (bar () != 1024 * 1023 || x != -12 || z != 2 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + else + b[i] = -1; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2 || x != 29) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += i; + if (b[i] != s) + abort (); + else + b[i] = -25; + } + if (qux () != 1024 * 1023 || x != 823 || y != 828) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + s += 2 * i; + if (b[i] != s) + abort (); + } + return 0; +} --- libgomp/testsuite/libgomp.c/scan-10.c.jj 2019-07-04 12:09:44.171630459 +0200 +++ libgomp/testsuite/libgomp.c/scan-10.c 2019-07-04 12:11:12.169224358 +0200 @@ -0,0 +1,116 @@ +/* { dg-require-effective-target size32plus } */ + +extern void abort (void); +int r, a[1024], b[1024], x, y, z; + +__attribute__((noipa)) void +foo (int *a, int *b) +{ + #pragma omp for reduction (inscan, +:r) lastprivate (conditional: z) firstprivate (x) private (y) + for (int i = 0; i < 1024; i++) + { + { b[i] = r; if ((i & 1) == 0 && i < 937) z = r; } + #pragma omp scan exclusive(r) + { y = a[i]; r += y + x + 12; } + } +} + +__attribute__((noipa)) int +bar (void) +{ + int s = 0; + #pragma omp parallel + #pragma omp for reduction (inscan, +:s) firstprivate (x) private (y) lastprivate (z) + for (int i = 0; i < 1024; i++) + { + { y = s; b[i] = y + x + 12; } + #pragma omp scan exclusive(s) + { y = 2 * a[i]; s += y; z = y; } + } + return s; +} + +__attribute__((noipa)) void +baz (int *a, int *b) +{ + #pragma omp parallel for reduction (inscan, +:r) firstprivate (x) lastprivate (x) + for (int i = 0; i < 1024; i++) + { + b[i] = r; + #pragma omp scan exclusive(r) + { r += a[i] + x + 12; if (i == 1023) x = 29; } + } +} + +__attribute__((noipa)) int +qux (void) +{ + int s = 0; + #pragma omp parallel for reduction (inscan, +:s) lastprivate (conditional: x, y) + for (int i = 0; i < 1024; i++) + { + { b[i] = s; if ((a[i] & 1) == 0 && i < 829) y = a[i]; } + #pragma omp scan exclusive(s) + { s += 2 * a[i]; if ((a[i] & 1) == 1 && i < 825) x = a[i]; } + } + return s; +} + +int +main () +{ + int s = 0; + x = -12; + for (int i = 0; i < 1024; ++i) + { + a[i] = i; + b[i] = -1; + asm ("" : "+g" (i)); + } + #pragma omp parallel + foo (a, b); + if (r != 1024 * 1023 / 2 || x != -12 || z != b[936]) + abort (); + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = 25; + s += i; + } + if (bar () != 1024 * 1023 || x != -12 || z != 2 * 1023) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -1; + s += 2 * i; + } + r = 0; + baz (a, b); + if (r != 1024 * 1023 / 2 || x != 29) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + else + b[i] = -25; + s += i; + } + if (qux () != 1024 * 1023 || x != 823 || y != 828) + abort (); + s = 0; + for (int i = 0; i < 1024; ++i) + { + if (b[i] != s) + abort (); + s += 2 * i; + } + return 0; +} Jakub