Hi! The public draft required low bound in array sections to be zero (but it was unclear if at compile time or at runtime only). After discussions, the whole restriction got removed, so now low-bound can be arbitrary.
The following patch implements that. Committed to gomp-4_5-branch after retesting on x86_64-linux. 2015-10-16 Jakub Jelinek <ja...@redhat.com> * gimplify.c (gimplify_scan_omp_clauses): Gimplify variable low-bound for array reduction. Look through POINTER_PLUS_EXPR when looking for ADDR_EXPR for array section reductions. * omp-low.c (scan_sharing_clauses): Look through POINTER_PLUS_EXPR for array section reductions. (lower_send_clauses): Likewise. (lower_rec_input_clauses): Handle non-zero low-bound on array section reductions. (lower_reduction_clauses): Likewise. gcc/c/ * c-typeck.c (handle_omp_array_sections_1): Allow non-zero low-bound on OMP_CLAUSE_REDUCTION array sections. (handle_omp_array_sections): Encode low-bound into the MEM_REF, either into the constant offset, or for variable low-bound using POINTER_PLUS_EXPR. (c_finish_omp_clauses): Look through POINTER_PLUS_EXPR for array section reductions. gcc/cp/ * semantics.c (handle_omp_array_sections_1): Allow non-zero low-bound on OMP_CLAUSE_REDUCTION array sections. (handle_omp_array_sections): Encode low-bound into the MEM_REF, either into the constant offset, or for variable low-bound using POINTER_PLUS_EXPR. (finish_omp_clauses): Look through POINTER_PLUS_EXPR for array section reductions. gcc/testsuite/ * c-c++-common/gomp/reduction-1.c (foo): Don't expect diagnostics on non-zero low-bound in reduction array sections. Add further tests. libgomp/ * testsuite/libgomp.c/reduction-11.c: New test. * testsuite/libgomp.c/reduction-12.c: New test. * testsuite/libgomp.c/reduction-13.c: New test. * testsuite/libgomp.c/reduction-14.c: New test. * testsuite/libgomp.c/reduction-15.c: New test. * testsuite/libgomp.c++/reduction-11.C: New test. * testsuite/libgomp.c++/reduction-12.C: New test. --- gcc/gimplify.c.jj 2015-10-14 10:25:43.000000000 +0200 +++ gcc/gimplify.c 2015-10-16 14:39:39.841597858 +0200 @@ -6326,6 +6326,23 @@ gimplify_scan_omp_clauses (tree *list_p, omp_notice_variable (ctx, v, true); } decl = TREE_OPERAND (decl, 0); + if (TREE_CODE (decl) == POINTER_PLUS_EXPR) + { + if (gimplify_expr (&TREE_OPERAND (decl, 1), pre_p, + NULL, is_gimple_val, fb_rvalue) + == GS_ERROR) + { + remove = true; + break; + } + v = TREE_OPERAND (decl, 1); + if (DECL_P (v)) + { + omp_firstprivatize_variable (ctx, v); + omp_notice_variable (ctx, v, true); + } + decl = TREE_OPERAND (decl, 0); + } if (TREE_CODE (decl) == ADDR_EXPR || TREE_CODE (decl) == INDIRECT_REF) decl = TREE_OPERAND (decl, 0); @@ -6925,7 +6942,12 @@ gimplify_scan_omp_clauses (tree *list_p, || decl == OMP_CLAUSE_DECL (c) || (TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF && (TREE_CODE (TREE_OPERAND (OMP_CLAUSE_DECL (c), 0)) - == ADDR_EXPR))) + == ADDR_EXPR + || (TREE_CODE (TREE_OPERAND (OMP_CLAUSE_DECL (c), 0)) + == POINTER_PLUS_EXPR + && (TREE_CODE (TREE_OPERAND (TREE_OPERAND + (OMP_CLAUSE_DECL (c), 0), 0)) + == ADDR_EXPR))))) && omp_check_private (ctx, decl, false)) { error ("%s variable %qE is private in outer context", --- gcc/omp-low.c.jj 2015-10-14 18:04:13.000000000 +0200 +++ gcc/omp-low.c 2015-10-16 16:35:43.162945500 +0200 @@ -1919,6 +1919,8 @@ scan_sharing_clauses (tree clauses, omp_ && TREE_CODE (decl) == MEM_REF) { tree t = TREE_OPERAND (decl, 0); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); if (TREE_CODE (t) == INDIRECT_REF || TREE_CODE (t) == ADDR_EXPR) t = TREE_OPERAND (t, 0); @@ -4247,6 +4249,8 @@ lower_rec_input_clauses (tree clauses, g if (c_kind == OMP_CLAUSE_REDUCTION && TREE_CODE (var) == MEM_REF) { var = TREE_OPERAND (var, 0); + if (TREE_CODE (var) == POINTER_PLUS_EXPR) + var = TREE_OPERAND (var, 0); if (TREE_CODE (var) == INDIRECT_REF || TREE_CODE (var) == ADDR_EXPR) var = TREE_OPERAND (var, 0); @@ -4275,7 +4279,28 @@ lower_rec_input_clauses (tree clauses, g if (pass == 0) continue; + tree bias = TREE_OPERAND (OMP_CLAUSE_DECL (c), 1); tree orig_var = TREE_OPERAND (OMP_CLAUSE_DECL (c), 0); + if (TREE_CODE (orig_var) == POINTER_PLUS_EXPR) + { + tree b = TREE_OPERAND (orig_var, 1); + b = maybe_lookup_decl (b, ctx); + if (b == NULL) + { + b = TREE_OPERAND (orig_var, 1); + b = maybe_lookup_decl_in_outer_ctx (b, ctx); + } + if (integer_zerop (bias)) + bias = b; + else + { + bias = fold_convert_loc (clause_loc, + TREE_TYPE (b), bias); + bias = fold_build2_loc (clause_loc, PLUS_EXPR, + TREE_TYPE (b), b, bias); + } + orig_var = TREE_OPERAND (orig_var, 0); + } if (TREE_CODE (orig_var) == INDIRECT_REF || TREE_CODE (orig_var) == ADDR_EXPR) orig_var = TREE_OPERAND (orig_var, 0); @@ -4316,7 +4341,24 @@ lower_rec_input_clauses (tree clauses, g tree y = create_tmp_var (ptype, name); gimplify_assign (y, x, ilist); x = y; - if (TREE_CODE (TREE_OPERAND (d, 0)) == ADDR_EXPR) + tree yb = y; + + if (!integer_zerop (bias)) + { + bias = fold_convert_loc (clause_loc, sizetype, bias); + bias = fold_build1_loc (clause_loc, NEGATE_EXPR, + sizetype, bias); + x = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR, + TREE_TYPE (x), x, bias); + yb = create_tmp_var (ptype, name); + gimplify_assign (yb, x, ilist); + x = yb; + } + + d = TREE_OPERAND (d, 0); + if (TREE_CODE (d) == POINTER_PLUS_EXPR) + d = TREE_OPERAND (d, 0); + if (TREE_CODE (d) == ADDR_EXPR) { if (orig_var != var) { @@ -4342,11 +4384,11 @@ lower_rec_input_clauses (tree clauses, g else { gcc_assert (orig_var == var); - if (TREE_CODE (TREE_OPERAND (d, 0)) == INDIRECT_REF) + if (TREE_CODE (d) == INDIRECT_REF) { x = create_tmp_var (ptype, name); TREE_ADDRESSABLE (x) = 1; - gimplify_assign (x, y, ilist); + gimplify_assign (x, yb, ilist); x = build_fold_addr_expr_loc (clause_loc, x); } x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x); @@ -4363,9 +4405,9 @@ lower_rec_input_clauses (tree clauses, g gimplify_assign (y2, y, ilist); tree ref = build_outer_var_ref (var, ctx); /* For ref build_outer_var_ref already performs this. */ - if (TREE_CODE (TREE_OPERAND (d, 0)) == INDIRECT_REF) + if (TREE_CODE (d) == INDIRECT_REF) gcc_assert (is_reference (var)); - else if (TREE_CODE (TREE_OPERAND (d, 0)) == ADDR_EXPR) + else if (TREE_CODE (d) == ADDR_EXPR) ref = build_fold_addr_expr (ref); else if (is_reference (var)) ref = build_fold_addr_expr (ref); @@ -5338,6 +5380,8 @@ lower_reduction_clauses (tree clauses, g if (TREE_CODE (var) == MEM_REF) { var = TREE_OPERAND (var, 0); + if (TREE_CODE (var) == POINTER_PLUS_EXPR) + var = TREE_OPERAND (var, 0); if (TREE_CODE (var) == INDIRECT_REF || TREE_CODE (var) == ADDR_EXPR) var = TREE_OPERAND (var, 0); @@ -5386,14 +5430,35 @@ lower_reduction_clauses (tree clauses, g tree v = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); tree i = create_tmp_var (TREE_TYPE (v), NULL); tree ptype = build_pointer_type (TREE_TYPE (type)); + tree bias = TREE_OPERAND (d, 1); + d = TREE_OPERAND (d, 0); + if (TREE_CODE (d) == POINTER_PLUS_EXPR) + { + tree b = TREE_OPERAND (d, 1); + b = maybe_lookup_decl (b, ctx); + if (b == NULL) + { + b = TREE_OPERAND (d, 1); + b = maybe_lookup_decl_in_outer_ctx (b, ctx); + } + if (integer_zerop (bias)) + bias = b; + else + { + bias = fold_convert_loc (clause_loc, TREE_TYPE (b), bias); + bias = fold_build2_loc (clause_loc, PLUS_EXPR, + TREE_TYPE (b), b, bias); + } + d = TREE_OPERAND (d, 0); + } /* For ref build_outer_var_ref already performs this, so only new_var needs a dereference. */ - if (TREE_CODE (TREE_OPERAND (d, 0)) == INDIRECT_REF) + if (TREE_CODE (d) == INDIRECT_REF) { new_var = build_simple_mem_ref_loc (clause_loc, new_var); gcc_assert (is_reference (var) && var == orig_var); } - else if (TREE_CODE (TREE_OPERAND (d, 0)) == ADDR_EXPR) + else if (TREE_CODE (d) == ADDR_EXPR) { if (orig_var == var) { @@ -5416,6 +5481,15 @@ lower_reduction_clauses (tree clauses, g v = maybe_lookup_decl_in_outer_ctx (v, ctx); gimplify_expr (&v, stmt_seqp, NULL, is_gimple_val, fb_rvalue); } + if (!integer_zerop (bias)) + { + bias = fold_convert_loc (clause_loc, sizetype, bias); + new_var = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR, + TREE_TYPE (new_var), new_var, + unshare_expr (bias)); + ref = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR, + TREE_TYPE (ref), ref, bias); + } new_var = fold_convert_loc (clause_loc, ptype, new_var); ref = fold_convert_loc (clause_loc, ptype, ref); tree m = create_tmp_var (ptype, NULL); @@ -5608,6 +5682,8 @@ lower_send_clauses (tree clauses, gimple && TREE_CODE (val) == MEM_REF) { val = TREE_OPERAND (val, 0); + if (TREE_CODE (val) == POINTER_PLUS_EXPR) + val = TREE_OPERAND (val, 0); if (TREE_CODE (val) == INDIRECT_REF || TREE_CODE (val) == ADDR_EXPR) val = TREE_OPERAND (val, 0); --- gcc/c/c-typeck.c.jj 2015-10-15 18:17:41.000000000 +0200 +++ gcc/c/c-typeck.c 2015-10-16 09:38:33.935389555 +0200 @@ -11779,13 +11779,6 @@ handle_omp_array_sections_1 (tree c, tre && (TREE_CODE (length) != INTEGER_CST || integer_onep (length))) first_non_one++; } - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION - && !integer_zerop (low_bound)) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%<reduction%> array section has to be zero-based"); - return error_mark_node; - } if (TREE_CODE (type) == ARRAY_TYPE) { if (length == NULL_TREE @@ -12126,7 +12119,24 @@ handle_omp_array_sections (tree c, bool tree ptype = build_pointer_type (eltype); if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) t = build_fold_addr_expr (t); - t = build2 (MEM_REF, type, t, build_int_cst (ptype, 0)); + tree t2 = build_fold_addr_expr (first); + t2 = fold_convert_loc (OMP_CLAUSE_LOCATION (c), + ptrdiff_type_node, t2); + t2 = fold_build2_loc (OMP_CLAUSE_LOCATION (c), MINUS_EXPR, + ptrdiff_type_node, t2, + fold_convert_loc (OMP_CLAUSE_LOCATION (c), + ptrdiff_type_node, t)); + t2 = c_fully_fold (t2, false, NULL); + if (tree_fits_shwi_p (t2)) + t = build2 (MEM_REF, type, t, + build_int_cst (ptype, tree_to_shwi (t2))); + else + { + t2 = fold_convert_loc (OMP_CLAUSE_LOCATION (c), sizetype, t2); + t = build2_loc (OMP_CLAUSE_LOCATION (c), POINTER_PLUS_EXPR, + TREE_TYPE (t), t, t2); + t = build2 (MEM_REF, type, t, build_int_cst (ptype, 0)); + } OMP_CLAUSE_DECL (c) = t; return false; } @@ -12466,6 +12476,8 @@ c_finish_omp_clauses (tree clauses, bool break; } t = TREE_OPERAND (t, 0); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); if (TREE_CODE (t) == ADDR_EXPR) t = TREE_OPERAND (t, 0); } --- gcc/cp/semantics.c.jj 2015-10-15 18:32:24.000000000 +0200 +++ gcc/cp/semantics.c 2015-10-16 17:08:35.278026456 +0200 @@ -4519,13 +4519,6 @@ handle_omp_array_sections_1 (tree c, tre && (TREE_CODE (length) != INTEGER_CST || integer_onep (length))) first_non_one++; } - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION - && !integer_zerop (low_bound)) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%<reduction%> array section has to be zero-based"); - return error_mark_node; - } if (TREE_CODE (type) == ARRAY_TYPE) { if (length == NULL_TREE @@ -4866,7 +4859,24 @@ handle_omp_array_sections (tree c, bool t = convert_from_reference (t); else if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) t = build_fold_addr_expr (t); - t = build2 (MEM_REF, type, t, build_int_cst (ptype, 0)); + tree t2 = build_fold_addr_expr (first); + t2 = fold_convert_loc (OMP_CLAUSE_LOCATION (c), + ptrdiff_type_node, t2); + t2 = fold_build2_loc (OMP_CLAUSE_LOCATION (c), MINUS_EXPR, + ptrdiff_type_node, t2, + fold_convert_loc (OMP_CLAUSE_LOCATION (c), + ptrdiff_type_node, t)); + if (tree_fits_shwi_p (t2)) + t = build2 (MEM_REF, type, t, + build_int_cst (ptype, tree_to_shwi (t2))); + else + { + t2 = fold_convert_loc (OMP_CLAUSE_LOCATION (c), + sizetype, t2); + t = build2_loc (OMP_CLAUSE_LOCATION (c), POINTER_PLUS_EXPR, + TREE_TYPE (t), t, t2); + t = build2 (MEM_REF, type, t, build_int_cst (ptype, 0)); + } OMP_CLAUSE_DECL (c) = t; return false; } @@ -5694,6 +5704,8 @@ finish_omp_clauses (tree clauses, bool a { gcc_assert (TREE_CODE (t) == MEM_REF); t = TREE_OPERAND (t, 0); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); if (TREE_CODE (t) == ADDR_EXPR || TREE_CODE (t) == INDIRECT_REF) t = TREE_OPERAND (t, 0); --- gcc/testsuite/c-c++-common/gomp/reduction-1.c.jj 2015-10-14 10:25:30.000000000 +0200 +++ gcc/testsuite/c-c++-common/gomp/reduction-1.c 2015-10-16 14:51:06.709543427 +0200 @@ -26,9 +26,17 @@ foo (int a[10][10][10], int **b, int x) bar (a); #pragma omp parallel reduction(+: a[0:4]) bar (a); - #pragma omp parallel reduction(+: a[2:4]) /* { dg-error "array section has to be zero-based" } */ + #pragma omp parallel reduction(+: a[2:4]) bar (a); - #pragma omp parallel reduction(+: e[2:4]) /* { dg-error "array section has to be zero-based" } */ + #pragma omp parallel reduction(+: e[2:4]) + bar (a); + #pragma omp parallel reduction(+: a[x:4]) + bar (a); + #pragma omp parallel reduction(+: e[x:4]) + bar (a); + #pragma omp parallel reduction(+: a[x:x]) + bar (a); + #pragma omp parallel reduction(+: e[x:x]) bar (a); #pragma omp parallel reduction(+: a[0.5:2]) /* { dg-error "low bound \[^\n\r]* of array section does not have integral type" } */ bar (a); --- libgomp/testsuite/libgomp.c/reduction-11.c.jj 2015-10-16 14:57:59.000502905 +0200 +++ libgomp/testsuite/libgomp.c/reduction-11.c 2015-10-16 16:37:35.162308552 +0200 @@ -0,0 +1,60 @@ +char z[10] = { 0 }; + +__attribute__((noinline, noclone)) void +foo (int (*x)[3][2], int *y, long w[1][2], int s, int t) +{ + unsigned long long a[9] = {}; + short b[5] = {}; + int i; + #pragma omp parallel for reduction(+:x[-1:2][:][0:2], z[t + 2:4]) \ + reduction(*:y[-s:3]) reduction(|:a[s + 3:4]) \ + reduction(&:w[s + 1:1][t:2]) reduction(max:b[2:]) + for (i = 0; i < 128; i++) + { + x[i / 64 - 1][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[1] *= 3; + if ((i & 31) == 2) + y[2] *= 7; + if ((i & 63) == 3) + y[3] *= 17; + z[i / 32 + 2] += (i & 3); + if (i < 4) + z[i + 2] += i; + a[i / 32 + 2] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (i = 0; i < 9; i++) + if (a[i] != ((i < 6 && i >= 2) ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != 0 || b[1] != 0 || b[2] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int +main () +{ + int a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + int y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 }; + long w[1][2] = { ~0L, ~0L }; + foo (&a[2], y, w, -1, 0); + if (__builtin_memcmp (a, a2, sizeof (a)) + || __builtin_memcmp (y, y2, sizeof (y)) + || __builtin_memcmp (z, z2, sizeof (z)) + || w[0][0] != ~0x249249L + || w[0][1] != ~0x249249L) + __builtin_abort (); + return 0; +} --- libgomp/testsuite/libgomp.c/reduction-12.c.jj 2015-10-16 16:40:11.638021550 +0200 +++ libgomp/testsuite/libgomp.c/reduction-12.c 2015-10-16 16:43:30.244118783 +0200 @@ -0,0 +1,94 @@ +struct A { int t; }; +struct B { char t; }; +struct C { unsigned long long t; }; +struct D { long t; }; +void +add (struct B *x, struct B *y) +{ + x->t += y->t; +} +void +zero (struct B *x) +{ + x->t = 0; +} +void +orit (struct C *x, struct C *y) +{ + y->t |= x->t; +} +#pragma omp declare reduction(+:struct A:omp_out.t += omp_in.t) +#pragma omp declare reduction(+:struct B:add (&omp_out, &omp_in)) initializer(zero (&omp_priv)) +#pragma omp declare reduction(*:struct A:omp_out.t *= omp_in.t) initializer(omp_priv = { 1 }) +#pragma omp declare reduction(|:struct C:orit (&omp_in, &omp_out)) +#pragma omp declare reduction(&:struct D:omp_out.t = omp_out.t & omp_in.t) initializer(omp_priv = { ~0L }) +#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6) + +struct B z[10]; + +__attribute__((noinline, noclone)) void +foo (struct A (*x)[3][2], struct A *y, struct D w[1][2], int s, int t) +{ + struct C a[9] = {}; + short b[5] = {}; + int i; + #pragma omp parallel for reduction(+:x[-1:2][:][0:2], z[t + 2:4]) \ + reduction(*:y[-s:3]) reduction(|:a[s + 3:4]) \ + reduction(&:w[s + 1:1][t:2]) reduction(maxb:b[2:]) + for (i = 0; i < 128; i++) + { + x[i / 64 - 1][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[1].t *= 3; + if ((i & 31) == 2) + y[2].t *= 7; + if ((i & 63) == 3) + y[3].t *= 17; + z[i / 32 + 2].t += (i & 3); + if (i < 4) + z[i + 2].t += i; + a[i / 32 + 2].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (i = 0; i < 9; i++) + if (a[i].t != ((i < 6 && i >= 2) ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != 0 || b[1] != 0 || b[2] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int +main () +{ + struct A a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + struct A y[5] = { { 0 }, { 1 }, { 1 }, { 1 }, { 0 } }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 }; + struct D w[1][2] = { { { ~0L }, { ~0L } } }; + foo (&a[2], y, w, -1, 0); + int i, j, k; + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + for (k = 0; k < 2; k++) + if (a[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (i = 0; i < 5; i++) + if (y[i].t != y2[i]) + __builtin_abort (); + for (i = 0; i < 10; i++) + if (z[i].t != z2[i]) + __builtin_abort (); + if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L) + __builtin_abort (); + return 0; +} --- libgomp/testsuite/libgomp.c/reduction-13.c.jj 2015-10-16 16:47:30.250609204 +0200 +++ libgomp/testsuite/libgomp.c/reduction-13.c 2015-10-16 16:56:20.801848488 +0200 @@ -0,0 +1,67 @@ +char z[10] = { 0 }; + +__attribute__((noinline, noclone)) void +foo (int (*x)[3][2], int *y, long w[1][2], int p1, long p2, long p3, int p4, + int p5, long p6, short p7, int s, int t) +{ + unsigned long long a[p7 + 4]; + short b[p7]; + int i; + for (i = 0; i < p7 + 4; i++) + { + if (i < p7) + b[i] = -6; + a[i] = 0; + } + #pragma omp parallel for reduction(+:x[-1:p1 + 1][:p2], z[t + 2:p3]) \ + reduction(*:y[-s:p4]) reduction(|:a[s + 3:p5]) \ + reduction(&:w[s + 1:p6 - 1][t:p6]) reduction(max:b[2:]) + for (i = 0; i < 128; i++) + { + x[i / 64 - 1][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[1] *= 3; + if ((i & 31) == 2) + y[2] *= 7; + if ((i & 63) == 3) + y[3] *= 17; + z[i / 32 + 2] += (i & 3); + if (i < 4) + z[i + 2] += i; + a[i / 32 + 2] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (i = 0; i < 9; i++) + if (a[i] != ((i < 6 && i >= 2) ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != -6 || b[1] != -6 || b[2] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int +main () +{ + int a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + int y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 }; + long w[1][2] = { ~0L, ~0L }; + foo (&a[2], y, w, 1, 3L, 4L, 3, 4, 2L, 5, -1, 0); + if (__builtin_memcmp (a, a2, sizeof (a)) + || __builtin_memcmp (y, y2, sizeof (y)) + || __builtin_memcmp (z, z2, sizeof (z)) + || w[0][0] != ~0x249249L + || w[0][1] != ~0x249249L) + __builtin_abort (); + return 0; +} --- libgomp/testsuite/libgomp.c/reduction-14.c.jj 2015-10-16 16:51:45.162880438 +0200 +++ libgomp/testsuite/libgomp.c/reduction-14.c 2015-10-16 16:56:38.547588910 +0200 @@ -0,0 +1,101 @@ +struct A { int t; }; +struct B { char t; }; +struct C { unsigned long long t; }; +struct D { long t; }; +void +add (struct B *x, struct B *y) +{ + x->t += y->t; +} +void +zero (struct B *x) +{ + x->t = 0; +} +void +orit (struct C *x, struct C *y) +{ + y->t |= x->t; +} +#pragma omp declare reduction(+:struct A:omp_out.t += omp_in.t) +#pragma omp declare reduction(+:struct B:add (&omp_out, &omp_in)) initializer(zero (&omp_priv)) +#pragma omp declare reduction(*:struct A:omp_out.t *= omp_in.t) initializer(omp_priv = { 1 }) +#pragma omp declare reduction(|:struct C:orit (&omp_in, &omp_out)) +#pragma omp declare reduction(&:struct D:omp_out.t = omp_out.t & omp_in.t) initializer(omp_priv = { ~0L }) +#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6) + +struct B z[10]; + +__attribute__((noinline, noclone)) void +foo (struct A (*x)[3][2], struct A *y, struct D w[1][2], int p1, long p2, long p3, int p4, + int p5, long p6, short p7, int s, int t) +{ + struct C a[p7 + 4]; + short b[p7]; + int i; + for (i = 0; i < p7 + 4; i++) + { + if (i < p7) + b[i] = -6; + a[i].t = 0; + } + #pragma omp parallel for reduction(+:x[-1:p1 + 1][:p2], z[t + 2:p3]) \ + reduction(*:y[-s:p4]) reduction(|:a[s + 3:p5]) \ + reduction(&:w[s + 1:p6 - 1][t:p6]) reduction(maxb:b[2:]) + for (i = 0; i < 128; i++) + { + x[i / 64 - 1][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[1].t *= 3; + if ((i & 31) == 2) + y[2].t *= 7; + if ((i & 63) == 3) + y[3].t *= 17; + z[i / 32 + 2].t += (i & 3); + if (i < 4) + z[i + 2].t += i; + a[i / 32 + 2].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (i = 0; i < 9; i++) + if (a[i].t != ((i < 6 && i >= 2) ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != -6 || b[1] != -6 || b[2] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int +main () +{ + struct A a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + struct A y[5] = { { 0 }, { 1 }, { 1 }, { 1 }, { 0 } }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 }; + struct D w[1][2] = { { { ~0L }, { ~0L } } }; + foo (&a[2], y, w, 1, 3L, 4L, 3, 4, 2L, 5, -1, 0); + int i, j, k; + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + for (k = 0; k < 2; k++) + if (a[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (i = 0; i < 5; i++) + if (y[i].t != y2[i]) + __builtin_abort (); + for (i = 0; i < 10; i++) + if (z[i].t != z2[i]) + __builtin_abort (); + if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L) + __builtin_abort (); + return 0; +} --- libgomp/testsuite/libgomp.c/reduction-15.c.jj 2015-10-16 17:40:17.546234529 +0200 +++ libgomp/testsuite/libgomp.c/reduction-15.c 2015-10-16 17:47:41.357746092 +0200 @@ -0,0 +1,56 @@ +extern void abort (void); +int a[16], b[16], c[16], d[5][2]; + +__attribute__((noinline, noclone)) void +foo (int x, int y) +{ + int i; + #pragma omp for schedule (static, 1) reduction (+:a[:3]) + for (i = 0; i < 64; i++) + { + a[0] += i; + a[1] += 2 * i; + a[2] += 3 * i; + } + #pragma omp for schedule (guided) reduction (+:b[4:3]) + for (i = 0; i < 64; i++) + { + b[4] += i; + b[5] += 2 * i; + b[6] += 3 * i; + } + #pragma omp for schedule (static) reduction (+:c[x:4]) + for (i = 0; i < 64; i++) + { + c[9] += i; + c[10] += 2 * i; + c[11] += 3 * i; + c[12] += 4 * i; + } + #pragma omp for reduction (+:d[x - 8:2][y:]) + for (i = 0; i < 64; i++) + { + d[1][0] += i; + d[1][1] += 2 * i; + d[2][0] += 3 * i; + d[2][1] += 4 * i; + } +} + +int +main () +{ + int i; + #pragma omp parallel + foo (9, 0); + for (i = 0; i < 16; i++) + if (a[i] != (i < 3 ? 64 * 63 / 2 * (i + 1) : 0) + || b[i] != ((i >= 4 && i < 7) ? 64 * 63 / 2 * (i - 3) : 0) + || c[i] != ((i >= 9 && i < 13) ? 64 * 63 / 2 * (i - 8) : 0)) + abort (); + for (i = 0; i < 5; i++) + if (d[i][0] != ((i && i <= 2) ? 64 * 63 / 2 * (2 * i - 1) : 0) + || d[i][1] != ((i && i <= 2) ? 64 * 63 / 2 * (2 * i) : 0)) + abort (); + return 0; +} --- libgomp/testsuite/libgomp.c++/reduction-11.C.jj 2015-10-16 17:12:48.767317531 +0200 +++ libgomp/testsuite/libgomp.c++/reduction-11.C 2015-10-16 17:30:41.880612286 +0200 @@ -0,0 +1,119 @@ +char z[10] = { 0 }; + +__attribute__((noinline, noclone)) void +foo (int (*&x)[3][2], int *y, long (&w)[1][2], int s, int t) +{ + unsigned long long a[9] = {}; + short b[5] = {}; + #pragma omp parallel for reduction(+:x[-1:2][:][0:2], z[t + 2:4]) \ + reduction(*:y[-s:3]) reduction(|:a[s + 3:4]) \ + reduction(&:w[s + 1:][t:2]) reduction(max:b[2:]) + for (int i = 0; i < 128; i++) + { + x[i / 64 - 1][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[1] *= 3; + if ((i & 31) == 2) + y[2] *= 7; + if ((i & 63) == 3) + y[3] *= 17; + z[i / 32 + 2] += (i & 3); + if (i < 4) + z[i + 2] += i; + a[i / 32 + 2] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (int i = 0; i < 9; i++) + if (a[i] != ((i < 6 && i >= 2) ? 0x55555555ULL : 0)) + __builtin_abort (); + if (b[0] != 0 || b[1] != 0 || b[2] != 22 || b[3] != 84 || b[4] != 127) + __builtin_abort (); +} + +int a3[4][3][2]; +int (*p3)[3][2] = &a3[2]; +int y3[5] = { 0, 1, 1, 1, 0 }; +long w3[1][2] = { ~0L, ~0L }; +short bb[5]; + +struct S +{ + int (*&x)[3][2]; + int *y; + long (&w)[1][2]; + char z[10]; + short (&b)[5]; + unsigned long long a[9]; + S() : x(p3), y(y3), w(w3), z(), a(), b(bb) {} + __attribute__((noinline, noclone)) void foo (int s, int t); +}; + +void +S::foo (int s, int t) +{ + #pragma omp parallel for reduction(+:x[-1:2][:][0:2], z[t + 2:4]) \ + reduction(*:y[-s:3]) reduction(|:a[s + 3:4]) \ + reduction(&:w[s + 1:][t:2]) reduction(max:b[2:]) + for (int i = 0; i < 128; i++) + { + x[i / 64 - 1][i % 3][(i / 4) & 1] += i; + if ((i & 15) == 1) + y[1] *= 3; + if ((i & 31) == 2) + y[2] *= 7; + if ((i & 63) == 3) + y[3] *= 17; + z[i / 32 + 2] += (i & 3); + if (i < 4) + z[i + 2] += i; + a[i / 32 + 2] |= 1ULL << (i & 30); + w[0][i & 1] &= ~(1L << (i / 17 * 3)); + if ((i % 23) > b[2]) + b[2] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } +} + +int +main () +{ + int a[4][3][2] = {}; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + int (*p)[3][2] = &a[2]; + int y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 }; + long w[1][2] = { ~0L, ~0L }; + foo (p, y, w, -1, 0); + if (__builtin_memcmp (a, a2, sizeof (a)) + || __builtin_memcmp (y, y2, sizeof (y)) + || __builtin_memcmp (z, z2, sizeof (z)) + || w[0][0] != ~0x249249L + || w[0][1] != ~0x249249L) + __builtin_abort (); + S s; + s.foo (-1, 0); + for (int i = 0; i < 9; i++) + if (s.a[i] != ((i < 6 && i >= 2) ? 0x55555555ULL : 0)) + __builtin_abort (); + if (__builtin_memcmp (a3, a2, sizeof (a3)) + || __builtin_memcmp (y3, y2, sizeof (y3)) + || __builtin_memcmp (s.z, z2, sizeof (s.z)) + || w3[0][0] != ~0x249249L + || w3[0][1] != ~0x249249L) + __builtin_abort (); + if (bb[0] != 0 || bb[1] != 0 || bb[2] != 22 || bb[3] != 84 || bb[4] != 127) + __builtin_abort (); +} --- libgomp/testsuite/libgomp.c++/reduction-12.C.jj 2015-10-16 17:19:13.717685135 +0200 +++ libgomp/testsuite/libgomp.c++/reduction-12.C 2015-10-16 17:30:52.359458902 +0200 @@ -0,0 +1,193 @@ +template <typename T> +struct A +{ + A () { t = 0; } + A (T x) { t = x; } + A (const A &x) { t = x.t; } + ~A () {} + T t; +}; +template <typename T> +struct M +{ + M () { t = 1; } + M (T x) { t = x; } + M (const M &x) { t = x.t; } + ~M () {} + T t; +}; +template <typename T> +struct B +{ + B () { t = ~(T) 0; } + B (T x) { t = x; } + B (const B &x) { t = x.t; } + ~B () {} + T t; +}; +template <typename T> +void +add (T &x, T &y) +{ + x.t += y.t; +} +template <typename T> +void +zero (T &x) +{ + x.t = 0; +} +template <typename T> +void +orit (T *x, T *y) +{ + y->t |= x->t; +} +B<long> bb; +#pragma omp declare reduction(+:A<int>:omp_out.t += omp_in.t) +#pragma omp declare reduction(+:A<char>:add (omp_out, omp_in)) initializer(zero (omp_priv)) +#pragma omp declare reduction(*:M<int>:omp_out.t *= omp_in.t) initializer(omp_priv = 1) +#pragma omp declare reduction(|:A<unsigned long long>:orit (&omp_in, &omp_out)) +#pragma omp declare reduction(&:B<long>:omp_out.t = omp_out.t & omp_in.t) initializer(orit (&omp_priv, &omp_orig)) +#pragma omp declare reduction(maxb:short:omp_out = omp_in > omp_out ? omp_in : omp_out) initializer(omp_priv = -6) + +A<char> z[10]; + +template <int N> +__attribute__((noinline, noclone)) void +foo (A<int> (*&x)[3][N], M<int> *y, B<long> (&w)[1][N], int p1, long p2, long p3, int p4, + int p5, long p6, short p7, int s, int t) +{ + A<unsigned long long> a[p7 + 4]; + short bb[p7]; + short (&b)[p7] = bb; + for (int i = 0; i < p7; i++) + bb[i] = -6; + #pragma omp parallel for reduction(+:x[-1:p1 + 1][:p2 + N - 2], z[t + N:p3]) \ + reduction(*:y[-s:p4]) reduction(|:a[s + 3:p5 - N + 2]) \ + reduction(&:w[s + 1:p6 - 3 + N][t:p6]) reduction(maxb:b[N:]) + for (int i = 0; i < 128; i++) + { + x[i / 64 - 1][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[1].t *= 3; + if ((i & 31) == N) + y[2].t *= 7; + if ((i & 63) == 3) + y[N + 1].t *= 17; + z[i / 32 + 2].t += (i & 3); + if (i < 4) + z[i + N].t += i; + a[i / 32 + 2].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 23) > b[N]) + b[N] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } + for (int i = 0; i < 9; i++) + if (a[i].t != ((i < 6 && i >= 2) ? 0x55555555ULL : 0)) + __builtin_abort (); + if (bb[0] != -6 || bb[1] != -6 || bb[N] != 22 || bb[3] != 84 || bb[4] != 127) + __builtin_abort (); +} + +A<int> a3[4][3][2]; +A<int> (*p3)[3][2] = &a3[2]; +M<int> y3[5] = { 0, 1, 1, 1, 0 }; +B<long> w3[1][2]; + +template <int N> +struct S +{ + A<int> (*&x)[3][N]; + M<int> *y; + B<long> (&w)[1][N]; + A<char> z[10]; + short b[5]; + A<unsigned long long> a[9]; + S() : x(p3), y(y3), w(w3), z(), a(), b() {} + __attribute__((noinline, noclone)) void foo (int, long, long, int, int, long, short, int, int); +}; + +template <int N> +void +S<N>::foo (int p1, long p2, long p3, int p4, int p5, long p6, short p7, int s, int t) +{ + #pragma omp parallel for reduction(+:x[-1:p1 + 1][:p2][0:N], z[t + N:p3 + N - 2]) \ + reduction(*:y[-s:p4]) reduction(|:a[s + 3:p5]) \ + reduction(&:w[s + 1:p6 - 3 + N][t:p6]) reduction(maxb:b[N:]) + for (int i = 0; i < 128; i++) + { + x[i / 64 - 1][i % 3][(i / 4) & 1].t += i; + if ((i & 15) == 1) + y[1].t *= 3; + if ((i & 31) == N) + y[2].t *= 7; + if ((i & 63) == 3) + y[N + 1].t *= 17; + z[i / 32 + 2].t += (i & 3); + if (i < 4) + z[i + N].t += i; + a[i / 32 + 2].t |= 1ULL << (i & 30); + w[0][i & 1].t &= ~(1L << (i / 17 * 3)); + if ((i % 23) > b[N]) + b[N] = i % 23; + if ((i % 85) > b[3]) + b[3] = i % 85; + if ((i % 192) > b[4]) + b[4] = i % 192; + } +} + +int +main () +{ + A<int> a[4][3][2]; + static int a2[4][3][2] = {{{ 0, 0 }, { 0, 0 }, { 0, 0 }}, + {{ 312, 381 }, { 295, 356 }, { 337, 335 }}, + {{ 1041, 975 }, { 1016, 1085 }, { 935, 1060 }}, + {{ 0, 0 }, { 0, 0 }, { 0, 0 }}}; + A<int> (*p)[3][2] = &a[2]; + M<int> y[5] = { 0, 1, 1, 1, 0 }; + int y2[5] = { 0, 6561, 2401, 289, 0 }; + char z2[10] = { 0, 0, 48, 49, 50, 51, 0, 0, 0, 0 }; + B<long> w[1][2]; + foo<2> (p, y, w, 1, 3L, 4L, 3, 4, 2L, 5, -1, 0); + for (int i = 0; i < 4; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) + if (a[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (int i = 0; i < 5; i++) + if (y[i].t != y2[i]) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (z[i].t != z2[i]) + __builtin_abort (); + if (w[0][0].t != ~0x249249L || w[0][1].t != ~0x249249L) + __builtin_abort (); + S<2> s; + s.foo (1, 3L, 4L, 3, 4, 2L, 5, -1, 0); + for (int i = 0; i < 9; i++) + if (s.a[i].t != ((i < 6 && i >= 2) ? 0x55555555ULL : 0)) + __builtin_abort (); + for (int i = 0; i < 4; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) + if (a3[i][j][k].t != a2[i][j][k]) + __builtin_abort (); + for (int i = 0; i < 5; i++) + if (y3[i].t != y2[i]) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (s.z[i].t != z2[i]) + __builtin_abort (); + if (w3[0][0].t != ~0x249249L || w3[0][1].t != ~0x249249L) + __builtin_abort (); + if (s.b[0] != 0 || s.b[1] != 0 || s.b[2] != 22 + || s.b[3] != 84 || s.b[4] != 127) + __builtin_abort (); +} Jakub