Hi! This fixes #pragma omp ordered threads simd expansion. In that case we want GOMP_ordered_start () / GOMP_ordered_end () calls around the block, but those calls really should be done just once, while the other stuff in between GOMP_SIMD_ORDERED_{START,END} internal calls should be expanded in a loop from 0 to vf-1 with the iterator and linear vars being adjusted there. Therefore, I'm not emitting those calls in between the internal calls (that is what eventually should be a loop), but not outside either (because everything there should be vectorized). Thus, it is handled as an argument to the internal calls (for now not a big difference, as the vectorizer always gives up on this, but we should teach it to handle that case eventually).
Regtested on x86_64-linux, committed to gomp-4_5-branch. 2015-11-13 Jakub Jelinek <ja...@redhat.com> * omp-low.c (lower_omp_ordered): Add argument to GOMP_SIMD_ORDERED_* internal calls - 0 if ordered simd and 1 for ordered threads simd. * tree-vectorizer.c (adjust_simduid_builtins): If GOMP_SIMD_ORDERED_* argument is 1, replace it with GOMP_ordered_* call instead of removing it. * testsuite/libgomp.c/ordered-5.c: New test. --- gcc/omp-low.c.jj 2015-11-09 11:17:31.000000000 +0100 +++ gcc/omp-low.c 2015-11-13 17:20:18.701832932 +0100 @@ -13924,8 +13924,10 @@ lower_omp_ordered (gimple_stmt_iterator gomp_ordered *ord_stmt = as_a <gomp_ordered *> (stmt); gcall *x; gbind *bind; - bool simd - = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_SIMD); + bool simd = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), + OMP_CLAUSE_SIMD); + bool threads = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), + OMP_CLAUSE_THREADS); if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_DEPEND)) @@ -13948,7 +13950,8 @@ lower_omp_ordered (gimple_stmt_iterator if (simd) { - x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_START, 0); + x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_START, 1, + build_int_cst (NULL_TREE, threads)); cfun->has_simduid_loops = true; } else @@ -13962,7 +13965,8 @@ lower_omp_ordered (gimple_stmt_iterator gimple_omp_set_body (stmt, NULL); if (simd) - x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_END, 0); + x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_END, 1, + build_int_cst (NULL_TREE, threads)); else x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_END), 0); --- gcc/tree-vectorizer.c.jj 2015-11-09 11:17:56.000000000 +0100 +++ gcc/tree-vectorizer.c 2015-11-13 17:51:32.793269963 +0100 @@ -177,6 +177,21 @@ adjust_simduid_builtins (hash_table<simd break; case IFN_GOMP_SIMD_ORDERED_START: case IFN_GOMP_SIMD_ORDERED_END: + if (integer_onep (gimple_call_arg (stmt, 0))) + { + enum built_in_function bcode + = (ifn == IFN_GOMP_SIMD_ORDERED_START + ? BUILT_IN_GOMP_ORDERED_START + : BUILT_IN_GOMP_ORDERED_END); + gimple *g + = gimple_build_call (builtin_decl_explicit (bcode), 0); + tree vdef = gimple_vdef (stmt); + gimple_set_vdef (g, vdef); + SSA_NAME_DEF_STMT (vdef) = g; + gimple_set_vuse (g, gimple_vuse (stmt)); + gsi_replace (&i, g, true); + continue; + } gsi_remove (&i, true); unlink_stmt_vdef (stmt); continue; --- libgomp/testsuite/libgomp.c/ordered-5.c.jj 2015-11-13 17:56:26.182110077 +0100 +++ libgomp/testsuite/libgomp.c/ordered-5.c 2015-11-13 17:56:51.266753981 +0100 @@ -0,0 +1,30 @@ +/* { dg-do run } */ +/* { dg-additional-options "-msse2" { target sse2_runtime } } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +extern void abort (void); +int a[1024], b = -1; + +int +main () +{ + int i; + #pragma omp parallel for simd ordered + for (i = 0; i < 1024; i++) + { + a[i] = i; + #pragma omp ordered threads simd + { + if (b + 1 != i) + abort (); + b = i; + } + a[i] += 3; + } + if (b != 1023) + abort (); + for (i = 0; i < 1024; i++) + if (a[i] != i + 3) + abort (); + return 0; +} Jakub