[PATCH] C-amily: Properly strip NOP_EXPR
gcc/c-family/ PR c/51628 * c-warn.c (warn_for_address_or_pointer_of_packed_member): Move NOP_EXPR check to ... (check_and_warn_address_of_packed_member): Here. gcc/testsuite/ PR c/51628 * c-c++-common/pr51628-33.c: New test. --- gcc/c-family/c-warn.c | 6 +++--- gcc/testsuite/c-c++-common/pr51628-33.c | 19 +++ 2 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/pr51628-33.c diff --git a/gcc/c-family/c-warn.c b/gcc/c-family/c-warn.c index 79b2d8ad449..070934ab2b6 100644 --- a/gcc/c-family/c-warn.c +++ b/gcc/c-family/c-warn.c @@ -2755,6 +2755,9 @@ check_and_warn_address_of_packed_member (tree type, tree rhs) while (TREE_CODE (rhs) == COMPOUND_EXPR) rhs = TREE_OPERAND (rhs, 1); + if (TREE_CODE (rhs) == NOP_EXPR) + rhs = TREE_OPERAND (rhs, 0); + tree context = check_address_of_packed_member (type, rhs); if (context) { @@ -2844,9 +2847,6 @@ warn_for_address_or_pointer_of_packed_member (bool convert_p, tree type, /* Get the type of the pointer pointing to. */ type = TREE_TYPE (type); - if (TREE_CODE (rhs) == NOP_EXPR) - rhs = TREE_OPERAND (rhs, 0); - check_and_warn_address_of_packed_member (type, rhs); } } diff --git a/gcc/testsuite/c-c++-common/pr51628-33.c b/gcc/testsuite/c-c++-common/pr51628-33.c new file mode 100644 index 000..0092f32202f --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr51628-33.c @@ -0,0 +1,19 @@ +/* PR c/51628. */ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +struct pair_t +{ + char x; + int i[4]; +} __attribute__ ((packed, aligned (4))); + +extern struct pair_t p; +extern void bar (int *); + +void +foo (struct pair_t *p) +{ + bar (p ? p->i : (int *) 0); +/* { dg-warning "may result in an unaligned pointer value" "" { target *-*-* } .-1 } */ +} -- 2.20.1
[PATCH] C-family: Replace "may may" with "may" in warning message
gcc/c-family/ * c-warn.c (warn_for_address_or_pointer_of_packed_member): Replace "may may" with "may" in warning message. gcc/c-family/testsuite/ * gcc.dg/pr51628-20.c: Updated. * gcc.dg/pr51628-21.c: Likewise. * gcc.dg/pr51628-25.c: Likewise. --- gcc/c-family/c-warn.c | 2 +- gcc/testsuite/gcc.dg/pr51628-20.c | 2 +- gcc/testsuite/gcc.dg/pr51628-21.c | 2 +- gcc/testsuite/gcc.dg/pr51628-25.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gcc/c-family/c-warn.c b/gcc/c-family/c-warn.c index f84736716b6..79b2d8ad449 100644 --- a/gcc/c-family/c-warn.c +++ b/gcc/c-family/c-warn.c @@ -2828,7 +2828,7 @@ warn_for_address_or_pointer_of_packed_member (bool convert_p, tree type, location_t location = EXPR_LOC_OR_LOC (rhs, input_location); warning_at (location, OPT_Waddress_of_packed_member, "converting a packed %qT pointer (alignment %d) " - "to %qT (alignment %d) may may result in an " + "to %qT (alignment %d) may result in an " "unaligned pointer value", rhstype, rhs_align, type, type_align); tree decl = TYPE_STUB_DECL (TREE_TYPE (rhstype)); diff --git a/gcc/testsuite/gcc.dg/pr51628-20.c b/gcc/testsuite/gcc.dg/pr51628-20.c index 80888283b73..bcdbff1e554 100644 --- a/gcc/testsuite/gcc.dg/pr51628-20.c +++ b/gcc/testsuite/gcc.dg/pr51628-20.c @@ -8,4 +8,4 @@ struct C { struct B b; } __attribute__ ((packed)); extern struct C *p; long* g8 (void) { return p; } -/* { dg-warning "may may result in an unaligned pointer value" "" { target *-*-* } .-1 } */ +/* { dg-warning "may result in an unaligned pointer value" "" { target *-*-* } .-1 } */ diff --git a/gcc/testsuite/gcc.dg/pr51628-21.c b/gcc/testsuite/gcc.dg/pr51628-21.c index 3077e72c8d5..0c7fab75d8a 100644 --- a/gcc/testsuite/gcc.dg/pr51628-21.c +++ b/gcc/testsuite/gcc.dg/pr51628-21.c @@ -8,4 +8,4 @@ struct C { struct B b; } __attribute__ ((packed)); extern struct C p[]; long* g8 (void) { return p; } -/* { dg-warning "may may result in an unaligned pointer value" "" { target *-*-* } .-1 } */ +/* { dg-warning "may result in an unaligned pointer value" "" { target *-*-* } .-1 } */ diff --git a/gcc/testsuite/gcc.dg/pr51628-25.c b/gcc/testsuite/gcc.dg/pr51628-25.c index 2fc5c028711..94a3a8fbaf2 100644 --- a/gcc/testsuite/gcc.dg/pr51628-25.c +++ b/gcc/testsuite/gcc.dg/pr51628-25.c @@ -6,4 +6,4 @@ struct B { int i; }; struct C { struct B b; } __attribute__ ((packed)); long* g8 (struct C *p) { return p; } -/* { dg-warning "may may result in an unaligned pointer value" "" { target *-*-* } .-1 } */ +/* { dg-warning "may result in an unaligned pointer value" "" { target *-*-* } .-1 } */ -- 2.20.1
Re: [patch, fortran] Fix the rest of PR 59345
On Sat, Jan 12, 2019 at 10:09:14PM +0100, Thomas Koenig wrote: > Hello world, > > this patch fixes the rest of the PR by making sure we do not > pack/unpack for function results which are either allocatable > or explicit shape arrays. > > Regression-tested. OK for trunk? > OK. -- Steve
[patch,libgfortran] PR88776 Namelist read from stdin: loss of data
Hi all, As stated in the PR, the problem turns out to be an ungraceful return after an error. Most namelist errors go through nml_err_ret, The one I am removing did not and in the unique case of UNIT=5 after the error it falls through and hits some code which modifies pointers to the namelist data structures. This patch fixes it. Regression tested on x86-64 and manually tested with a redirection to stdin. (cat somefile | ./a.out ) I plan to commit today as simple along with a new testcase. Regards. Jerry 2019-01-12 Jerry DeLisle PR libfortran/88776 * io/list_read.c (namelist_read): Use nml_err_ret path on read error, not based on stdin_unit. diff --git a/libgfortran/io/list_read.c b/libgfortran/io/list_read.c index 4a7ccb3ddd5..d9af255a034 100644 --- a/libgfortran/io/list_read.c +++ b/libgfortran/io/list_read.c @@ -3614,11 +3614,7 @@ find_nml_name: while (!dtp->u.p.input_complete) { if (!nml_get_obj_data (dtp, _nl, nml_err_msg, sizeof nml_err_msg)) - { - if (dtp->u.p.current_unit->unit_number != options.stdin_unit) - goto nml_err_ret; - generate_error (>common, LIBERROR_READ_VALUE, nml_err_msg); -} + goto nml_err_ret; /* Reset the previous namelist pointer if we know we are not going to be doing multiple reads within a single namelist object. */
[PATCH 9/9] [nvptx] Enable setting vector length using -fopenacc-dim -- testcases
Add some test-cases that set vector length using -fopenacc-dim. 2019-01-12 Tom de Vries * testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: New test. * testsuite/libgomp.oacc-fortran/gemm-2.f90: New test. --- .../libgomp.oacc-c-c++-common/pr85486-2.c | 52 ++ .../vector-length-128-2.c | 39 +++ .../vector-length-128-5.c | 41 +++ libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 | 80 ++ 4 files changed, 212 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c new file mode 100644 index 000..f6ca263166d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c @@ -0,0 +1,52 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-fopenacc-dim=::128" } */ + +/* Minimized from ref-1.C. */ + +#include + +#pragma acc routine vector +void __attribute__((noinline, noclone)) +Vector (int *ptr, int n, const int inc) +{ + #pragma acc loop vector + for (unsigned ix = 0; ix < n; ix++) +ptr[ix] += inc; +} + +int +main (void) +{ + const int n = 32, m=32; + + int ary[m][n]; + unsigned ix, iy; + + for (ix = m; ix--;) +for (iy = n; iy--;) + ary[ix][iy] = (1 << 16) + (ix << 8) + iy; + + int err = 0; + +#pragma acc parallel copy (ary) + { +Vector ([0][0], m * n, (1 << 24) - (1 << 16)); + } + + for (ix = m; ix--;) +for (iy = n; iy--;) + if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy)) + { + printf ("ary[%u][%u] = %x expected %x\n", + ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy)); + err++; + } + + if (err) +{ + printf ("%d failed\n", err); + return 1; +} + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c new file mode 100644 index 000..8b5b2a4a92d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c @@ -0,0 +1,39 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-fopenacc-dim=::128" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ + +#include + +#define N 1024 + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; +unsigned int n = N; + +int +main (void) +{ + for (unsigned int i = 0; i < n; ++i) +{ + a[i] = i % 3; + b[i] = i % 5; +} + +#pragma acc parallel copyin (a,b) copyout (c) + { +#pragma acc loop vector +for (unsigned int i = 0; i < n; i++) + c[i] = a[i] + b[i]; + } + + for (unsigned int i = 0; i < n; ++i) +if (c[i] != (i % 3) + (i % 5)) + abort (); + + return 0; +} + +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c new file mode 100644 index 000..e60f1c28db4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c @@ -0,0 +1,41 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-fopenacc-dim=:2:128" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ + +#include + +#define N 1024 + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; +unsigned int n = N; + +int +main (void) +{ + for (unsigned int i = 0; i < n; ++i) +{ + a[i] = i % 3; + b[i] = i % 5; +} + +#pragma acc parallel copyin (a,b) copyout (c) + { +#pragma acc loop worker +for (unsigned int i = 0; i < 4; i++) +#pragma acc loop vector + for (unsigned int j = 0; j < n / 4; j++) + c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j]; + } + + for (unsigned int i = 0; i < n; ++i) +if (c[i] != (i % 3) + (i % 5)) + abort (); + + return 0; +} + +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */ diff --git
[PATCH 6/9] [nvptx] Force vl32 if calling vector-partitionable routines -- test-cases
Add test-cases for "[nvptx] Force vl32 if calling vector-partitionable routines". 2018-12-17 Tom de Vries PR target/85486 * testsuite/libgomp.oacc-c-c++-common/pr85486-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr85486.c: New test. --- .../libgomp.oacc-c-c++-common/pr85486-3.c | 54 ++ .../testsuite/libgomp.oacc-c-c++-common/pr85486.c | 51 2 files changed, 105 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c new file mode 100644 index 000..a959b90c29a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c @@ -0,0 +1,54 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */ + +/* Minimized from ref-1.C. */ + +#include + +#pragma acc routine vector +void __attribute__((noinline, noclone)) +Vector (int *ptr, int n, const int inc) +{ + #pragma acc loop vector + for (unsigned ix = 0; ix < n; ix++) +ptr[ix] += inc; +} + +int +main (void) +{ + const int n = 32, m=32; + + int ary[m][n]; + unsigned ix, iy; + + for (ix = m; ix--;) +for (iy = n; iy--;) + ary[ix][iy] = (1 << 16) + (ix << 8) + iy; + + int err = 0; + +#pragma acc parallel copy (ary) + { +Vector ([0][0], m * n, (1 << 24) - (1 << 16)); + } + + for (ix = m; ix--;) +for (iy = n; iy--;) + if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy)) + { + printf ("ary[%u][%u] = %x expected %x\n", + ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy)); + err++; + } + + if (err) +{ + printf ("%d failed\n", err); + return 1; +} + + return 0; +} + +/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c new file mode 100644 index 000..99c08059d37 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c @@ -0,0 +1,51 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +/* Minimized from ref-1.C. */ + +#include + +#pragma acc routine vector +void __attribute__((noinline, noclone)) +Vector (int *ptr, int n, const int inc) +{ + #pragma acc loop vector + for (unsigned ix = 0; ix < n; ix++) +ptr[ix] += inc; +} + +int +main (void) +{ + const int n = 32, m=32; + + int ary[m][n]; + unsigned ix, iy; + + for (ix = m; ix--;) +for (iy = n; iy--;) + ary[ix][iy] = (1 << 16) + (ix << 8) + iy; + + int err = 0; + +#pragma acc parallel copy (ary) vector_length (128) /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */ + { +Vector ([0][0], m * n, (1 << 24) - (1 << 16)); + } + + for (ix = m; ix--;) +for (iy = n; iy--;) + if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy)) + { + printf ("ary[%u][%u] = %x expected %x\n", + ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy)); + err++; + } + + if (err) +{ + printf ("%d failed\n", err); + return 1; +} + + return 0; +} -- 2.16.4
[PATCH 4/9] [nvptx] Enable large vectors -- reduction testcases
Add various reduction test-cases with vector length 128. 2018-12-17 Tom de Vries * testsuite/libgomp.oacc-c-c++-common/vred2d-128.c: New test. * testsuite/libgomp.oacc-fortran/gemm.f90: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c: New test. --- .../vector-length-128-10.c | 39 +++ .../libgomp.oacc-c-c++-common/vred2d-128.c | 55 +++ libgomp/testsuite/libgomp.oacc-fortran/gemm.f90| 79 ++ 3 files changed, 173 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c new file mode 100644 index 000..0658cfde7ad --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ + +#include + +#define N 1024 + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; +unsigned int n = N; + +int +main (void) +{ + for (unsigned int i = 0; i < n; ++i) +{ + a[i] = i % 3; + b[i] = i % 5; +} + + unsigned int res = 1; + unsigned long long res2 = 1; +#pragma acc parallel vector_length (128) copyin (a,b) reduction (+:res, res2) copy (res, res2) + { +#pragma acc loop vector reduction (+:res, res2) +for (unsigned int i = 0; i < n; i++) + { + res += ((a[i] + b[i]) % 2); + res2 += ((a[i] + b[i]) % 2); + } + } + + if (res != 478) +abort (); + if (res2 != 478) +abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c new file mode 100644 index 000..86171d456e0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c @@ -0,0 +1,55 @@ +/* Test large vector lengths. */ + +#include + +#define n 1 +int a1[n], a2[n]; + +#define gentest(name, outer, inner)\ + void name () \ + {\ + long i, j, t1, t2, t3; \ + _Pragma(outer) \ + for (i = 0; i < n; i++) \ +{ \ + t1 = 0; \ + t2 = 0; \ + _Pragma(inner) \ + for (j = i; j < n; j++) \ + { \ + t1++; \ + t2--; \ + } \ + a1[i] = t1; \ + a2[i] = t2; \ +} \ + for (i = 0; i < n; i++) \ +{ \ + assert (a1[i] == n-i); \ + assert (a2[i] == -(n-i));\ +} \ + }\ + +gentest (test1, "acc parallel loop gang vector_length (128) firstprivate (t1, t2)", +"acc loop vector reduction(+:t1) reduction(-:t2)") + +gentest (test2, "acc parallel loop gang vector_length (128) firstprivate (t1, t2)", +"acc loop worker vector reduction(+:t1) reduction(-:t2)") + +gentest (test3, "acc parallel loop gang worker vector_length (128) firstprivate (t1, t2)", +"acc loop vector reduction(+:t1) reduction(-:t2)") + +gentest (test4, "acc parallel loop firstprivate (t1, t2)", +"acc loop reduction(+:t1) reduction(-:t2)") + + +int +main () +{ + test1 (); + test2 (); + test3 (); + test4 (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 new file mode 100644 index 000..de78148c7b3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 @@ -0,0 +1,79 @@ +! Exercise three levels of parallelism using SGEMM from BLAS. + +! { dg-do run } + +! Explicitly set vector_length to 128 using a vector_length clause. +subroutine openacc_sgemm_128 (m, n, k, alpha, a, b, beta, c) + integer :: m, n, k + real :: alpha, beta + real :: a(k,*), b(k,*), c(m,*) + + integer :: i, j, l + real :: temp + + !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) vector_length (128) firstprivate (temp) + do j = 1, n + !$acc loop + do i = 1, m +temp = 0.0 +!$acc loop reduction(+:temp) +do l = 1, k + temp = temp + a(l,i)*b(l,j) +end do +if(beta == 0.0) then + c(i,j) = alpha*temp +else +
[PATCH 7/9] [nvptx] Add vector_length 64 test-cases
Add some test-cases using vector_length 64. 2019-01-10 Tom de Vries * testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c: New test. --- .../libgomp.oacc-c-c++-common/vector-length-64-1.c | 17 + .../libgomp.oacc-c-c++-common/vector-length-64-2.c | 21 + .../libgomp.oacc-c-c++-common/vector-length-64-3.c | 17 + 3 files changed, 55 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c new file mode 100644 index 000..b6ee732f863 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c @@ -0,0 +1,17 @@ +#include +#include + +int +main (void) +{ +#pragma acc parallel vector_length (64) num_workers (16) /* { dg-warning "using num_workers \\(15\\), ignoring 16" "" { target openacc_nvidia_accel_configured } } */ + { +#pragma acc loop worker +for (unsigned int i = 0; i < 32; i++) +#pragma acc loop vector + for (unsigned int j = 0; j < 64; j++) + ; + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c new file mode 100644 index 000..4dfbae8de91 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c @@ -0,0 +1,21 @@ +/* { dg-set-target-env-var "GOMP_OPENACC_DIM" ":16:" } */ +/* { dg-shouldfail "" { openacc_nvidia_accel_selected } } */ + +#include +#include + +int +main (void) +{ +#pragma acc parallel vector_length (64) + { +#pragma acc loop worker +for (unsigned int i = 0; i < 32; i++) +#pragma acc loop vector + for (unsigned int j = 0; j < 64; j++) + ; + } + + return 0; +} +/* { dg-output "The Nvidia accelerator has insufficient barrier resources" { target openacc_nvidia_accel_selected } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c new file mode 100644 index 000..1acb40e8357 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c @@ -0,0 +1,17 @@ +#include +#include + +int +main (void) +{ +#pragma acc parallel vector_length (64) + { +#pragma acc loop worker +for (unsigned int i = 0; i < 32; i++) +#pragma acc loop vector + for (unsigned int j = 0; j < 64; j++) + ; + } + + return 0; +} -- 2.16.4
[PATCH 5/9] [nvptx] Don't emit barriers for empty loops -- test-cases
Add test-cases for PR85381. 2018-12-17 Tom de Vries PR target/85381 * testsuite/libgomp.oacc-c-c++-common/pr85381-5.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr85381.c: New test. --- .../libgomp.oacc-c-c++-common/pr85381-5.c | 24 ++ .../testsuite/libgomp.oacc-c-c++-common/pr85381.c | 18 2 files changed, 42 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c new file mode 100644 index 000..61e7e48f0c9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c @@ -0,0 +1,24 @@ +/* { dg-additional-options "-save-temps" } */ +/* { dg-do run { target openacc_nvidia_accel_selected } } + { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */ + +#define n 1024 + +int +main (void) +{ + #pragma acc parallel vector_length(128) + { +#pragma acc loop vector +for (int i = 0; i < n; i++) + ; + +#pragma acc loop vector +for (int i = 0; i < n; i++) + ; + } + + return 0; +} + +/* { dg-final { scan-assembler-not "bar.sync" } } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c new file mode 100644 index 000..2864dfcf3cb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c @@ -0,0 +1,18 @@ +/* { dg-additional-options "-save-temps" } */ +/* { dg-do run { target openacc_nvidia_accel_selected } } + { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */ + +int +main (void) +{ + int v1; + + #pragma acc parallel vector_length (128) + #pragma acc loop vector + for (v1 = 0; v1 < 20; v1 += 2) +; + + return 0; +} + +/* { dg-final { scan-assembler-not "bar.sync" } } */ -- 2.16.4
[PATCH 8/9] [nvptx] Enable setting vector length using -fopenacc-dim
Enable setting vector length using -fopenacc-dim, f.i. -fopenacc-dim=::128. 2019-01-12 Tom de Vries * config/nvptx/nvptx.c (nvptx_goacc_validate_dims_1): Alow setting vector length using -fopenacc-dim. * plugin/plugin-nvptx.c (nvptx_exec): Update error message. --- gcc/config/nvptx/nvptx.c | 3 ++- libgomp/plugin/plugin-nvptx.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 8d2740cd50f..03c0f82f4a2 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -5705,7 +5705,8 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level, unsigned used) if (oacc_default_dims_p) { - dims[GOMP_DIM_VECTOR] = default_vector_length; + if (dims[GOMP_DIM_VECTOR] < 0) + dims[GOMP_DIM_VECTOR] = default_vector_length; if (dims[GOMP_DIM_WORKER] < 0) dims[GOMP_DIM_WORKER] = PTX_DEFAULT_RUNTIME_DIM; if (dims[GOMP_DIM_GANG] < 0) diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 8912660966a..dd2bcf3083f 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1321,7 +1321,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, " region or '-fopenacc-dim=:x:' where x <= 15" "; " "or, recompile the program with 'vector_length = 32' on that" - " offloaded region" + " offloaded region or '-fopenacc-dim=::32'" ".\n"); GOMP_PLUGIN_fatal (msg, targ_fn->launch->fn, dims[GOMP_DIM_WORKER], dims[GOMP_DIM_VECTOR]); -- 2.16.4
[PATCH 0/9] [COVER-LETTER, nvptx] Add support for warp-multiple openacc vector length
I. Current state The current openacc implementation sets vector length to warp-size. There are two aspects that need to be implemented for an openacc implementation to work: communication and synchronization. Synchronization is needed at the end of worker and vector loops. Communication is needed at the start of worker and vector loops, to propagate state that not has been calculated redundantly in vector-single and worker-single mode to vector-partition and worker-partitioned mode. For worker loops, synchronization at the end of the loop is done using the inter-warp synchronization instruction 'bar.sync 0'. Communication is done using a buffer in shared memory (and synchronization is used to ensure that the buffer is used properly). For vector loops with warp-sized vector length, synchronization at the end of the loop is not needed, since warps are synchronized by definition. Communication is done using the intra-warp communication instruction shfl. These vector and worker schemes do not change if we nest a vector loop in a worker loop. OTOH, a vector-and-worker loop uses the worker scheme. II. Patch series This patch series adds the possibility to use warp-multiple openacc vector length. This means we can no longer rely on the same mechanisms for communication and synchronization of vector loops, and need to apply the same ones as we do for worker loops. II.a Vector loop A vector loop with warp-sized vector length looks as before. A vector loop with warp-multiple vector length looks like a simple worker loop. II.b Vector-and-worker loop A vector-and-worker loop with is handled as worker loop, as before. II.c Vector loop in worker loop A vector loop in worker loop with warp-sized vector length looks as before. A vector loop in a worker loop with warp-multiple vector length is handled as follows. We use the 'bar.sync 0' instruction (which synchronizes all threads in a CTA) for worker synchronization, but to synchronize only the warps that form a vector together, we use 'bar.sync , ', where uniquely identifies the vector (we use the worker id, offset by one not to clash with logical barrier resource '0' used by worker synchronization, so: %tid.y + 1). Furthermore, the fact that vectors synchronize independently means that vector state needs to be propagated independently. We handle this by allocating a state propagation buffer for each vector. So, the shared memory buffer is partitioned into a part for worker propagation, and num_worker parts for vector propagation. We'll name the first part worker-generic and the other parts worker-specific (but we've got one vector per worker, so confusingly you might also call it vector-specific). In a vector loop in worker loop, we first transition from worker-single to worker-partitioned, and then from vector-single to vector-partitioned, which means state propagation from W0V0 to WAV0, and then state propagation from WAV0 to WAVA (using W for worker, V for vector, and A for all). For branch condition propagation however, a condition calculated in worker-single-vector-single mode is propagated from W0V0 to WAVA directly (so we use the worker-generic buffer for that). II.d Routines There's a question on how to handle vector-partitionable routines in such a scheme, given these can now be called from a context with a warp-multiple vector length, while the current implementation of routines assumes warp-sized vector length. This patch series takes a conservative approach: keep routine generation as is, and detect if we're calling a vector-partitionable routine from an offloading region, and if so we fall back to warp-sized vector length in that region. III. Testing Build and reg-tested on x86_64 with nvptx accelerator. Build and reg-tested on x86_64 with nvptx accelerator with PTX_DEFAULT_VECTOR_LENGTH set to various sizes. IV. Patches 1 [nvptx] Enable large vectors 2 [nvptx] Update insufficient launch message for variable vector_length 3 [nvptx] Enable large vectors -- test-cases 4 [nvptx] Enable large vectors -- reduction testcases 5 [nvptx] Don't emit barriers for empty loops -- test-cases 6 [nvptx] Force vl32 if calling vector-partitionable routines -- test-cases 7 [nvptx] Add vector_length 64 test-cases 8 [nvptx] Enable setting vector length using -fopenacc-dim 9 [nvptx] Enable setting vector length using -fopenacc-dim -- testcases Tom de Vries (9): [nvptx] Enable large vectors [nvptx] Update insufficient launch message for variable vector_length [nvptx] Enable large vectors -- test-cases [nvptx] Enable large vectors -- reduction testcases [nvptx] Don't emit barriers for empty loops -- test-cases [nvptx] Force vl32 if calling vector-partitionable routines -- test-cases [nvptx] Add vector_length 64 test-cases [nvptx] Enable setting vector length using -fopenacc-dim [nvptx] Enable setting vector length using -fopenacc-dim -- testcases
[PATCH 1/9] [nvptx] Enable large vectors
Allow vector_length clauses to accept values larger than warp size. Note that this does not enable setting vector_length to values larger than warp size using -fopenacc-dim. 2018-12-17 Tom de Vries * config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Take larger vector lengths into account. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c: Expect vector length to be 128. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Expect vector length 2097152 to be reduced to 1024 instead of 32. --- gcc/config/nvptx/nvptx.c | 2 +- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c | 4 ++-- libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c | 5 ++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 1d9704543d9..8d2740cd50f 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -96,7 +96,7 @@ #define PTX_NUM_PER_WORKER_BARRIERS (PTX_CTA_NUM_BARRIERS - PTX_NUM_PER_CTA_BARRIERS) #define PTX_DEFAULT_VECTOR_LENGTH PTX_WARP_SIZE -#define PTX_MAX_VECTOR_LENGTH PTX_WARP_SIZE +#define PTX_MAX_VECTOR_LENGTH PTX_CTA_SIZE #define PTX_WORKER_LENGTH 32 #define PTX_DEFAULT_RUNTIME_DIM 0 /* Defer to runtime. */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index 4a9854662cc..d7cd0461b53 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -350,7 +350,7 @@ int main () int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; gangs_min = workers_min = vectors_min = INT_MAX; gangs_max = workers_max = vectors_max = INT_MIN; -#pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \ +#pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(1024\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \ vector_length (VECTORS) { if (acc_on_device (acc_device_host)) @@ -361,7 +361,7 @@ int main () else if (acc_on_device (acc_device_nvidia)) { /* The GCC nvptx back end enforces vector_length (32). */ - vectors_actual = 32; + vectors_actual = 1024; } else __builtin_abort (); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c index fab5b0d25d1..18d77cc5ecb 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c @@ -33,7 +33,6 @@ main (void) return 0; } -/* { dg-prune-output "using vector_length \\(32\\), ignoring 128" } */ -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */ -/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */ -- 2.16.4
[PATCH 2/9] [nvptx] Update insufficient launch message for variable vector_length
Update message in nvptx libgomp plugin about insufficient resources to launch kernel, to accommodate for the fact the vector_length can now be variable. 19-01-08 Tom de Vries * plugin/plugin-nvptx.c (nvptx_exec): Update insufficient hardware resources diagnostic. --- libgomp/plugin/plugin-nvptx.c | 18 ++ 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index c80da64c422..8912660966a 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1297,14 +1297,16 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, if (dims[GOMP_DIM_WORKER] * dims[GOMP_DIM_VECTOR] > targ_fn->max_threads_per_block) { - int suggest_workers - = targ_fn->max_threads_per_block / dims[GOMP_DIM_VECTOR]; - GOMP_PLUGIN_fatal ("The Nvidia accelerator has insufficient resources to" -" launch '%s' with num_workers = %d; recompile the" -" program with 'num_workers = %d' on that offloaded" -" region or '-fopenacc-dim=:%d'", -targ_fn->launch->fn, dims[GOMP_DIM_WORKER], -suggest_workers, suggest_workers); + const char *msg + = ("The Nvidia accelerator has insufficient resources to launch '%s'" + " with num_workers = %d and vector_length = %d" + "; " + "recompile the program with 'num_workers = x and vector_length = y'" + " on that offloaded region or '-fopenacc-dim=:x:y' where" + " x * y <= %d" + ".\n"); + GOMP_PLUGIN_fatal (msg, targ_fn->launch->fn, dims[GOMP_DIM_WORKER], +dims[GOMP_DIM_VECTOR], targ_fn->max_threads_per_block); } /* Check if the accelerator has sufficient barrier resources to -- 2.16.4
[PATCH 3/9] [nvptx] Enable large vectors -- test-cases
Add various test-cases with vector length 128. 2018-12-17 Tom de Vries * testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c: New test. --- .../vector-length-128-4.c | 40 + .../vector-length-128-6.c | 41 ++ .../vector-length-128-7.c | 40 + 3 files changed, 121 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c new file mode 100644 index 000..e5d1df09b8a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c @@ -0,0 +1,40 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ + +#include + +#define N 1024 + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; +unsigned int n = N; + +int +main (void) +{ + for (unsigned int i = 0; i < n; ++i) +{ + a[i] = i % 3; + b[i] = i % 5; +} + +#pragma acc parallel num_workers (2) vector_length (128) copyin (a,b) copyout (c) + { +#pragma acc loop worker +for (unsigned int i = 0; i < 4; i++) +#pragma acc loop vector + for (unsigned int j = 0; j < n / 4; j++) + c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j]; + } + + for (unsigned int i = 0; i < n; ++i) +if (c[i] != (i % 3) + (i % 5)) + abort (); + + return 0; +} + +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c new file mode 100644 index 000..a1f67622f84 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c @@ -0,0 +1,41 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-set-target-env-var "GOMP_OPENACC_DIM" ":2:" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ + +#include + +#define N 1024 + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; +unsigned int n = N; + +int +main (void) +{ + for (unsigned int i = 0; i < n; ++i) +{ + a[i] = i % 3; + b[i] = i % 5; +} + +#pragma acc parallel vector_length (128) copyin (a,b) copyout (c) + { +#pragma acc loop worker +for (unsigned int i = 0; i < 4; i++) +#pragma acc loop vector + for (unsigned int j = 0; j < n / 4; j++) + c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j]; + } + + for (unsigned int i = 0; i < n; ++i) +if (c[i] != (i % 3) + (i % 5)) + abort (); + + return 0; +} + +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 0, 128\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c new file mode 100644 index 000..c419f6499b5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c @@ -0,0 +1,40 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ + +#include + +#define N 1024 + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; +unsigned int n = N; + +int +main (void) +{ + for (unsigned int i = 0; i < n; ++i) +{ + a[i] = i % 3; + b[i] = i % 5; +} + +#pragma acc parallel vector_length (128) copyin (a,b) copyout (c) + { +#pragma acc loop worker +for (unsigned int i = 0; i < 4; i++) +#pragma acc loop vector + for (unsigned int j = 0; j < n / 4; j++) + c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j]; + } + + for (unsigned int i = 0; i < n; ++i) +if (c[i] != (i % 3) + (i % 5)) + abort (); + + return 0; +} + +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 0, 128\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=8, vectors=128" } */ -- 2.16.4
Re: Add split_stack support for GNU/Hurd
On Sat, Jan 12, 2019 at 11:18 AM Svante Signell wrote: > > I order to prepare for inclusion of gccgo to GNU/Hurd split stack support has > been proposed, and in reality this patch has been applied to Debian since > gcc-6. > > Please commit this patch. Late on updated patches for gccgo in gcc-8 and > gcc-9? > will be reviewed and committed with the aid of Ian Lance Taylor and Matthis > Klose. Committed to trunk. Thanks. Ian
[patch, fortran] Fix the rest of PR 59345
Hello world, this patch fixes the rest of the PR by making sure we do not pack/unpack for function results which are either allocatable or explicit shape arrays. Regression-tested. OK for trunk? Regards Thomas 2019-01-12 Thomas Koenig PR fortran/59345 * trans-array.c (gfc_conv_array_parameter): Remove TODO. Do not pack/unpack results of functions which return an explicit-shaped or allocatable array. 2019-01-12 Thomas Koenig PR fortran/59345 * gfortran.dg/internal_pack_17.f90: New test. * gfortran.dg/alloc_comp_auto_array_3.f90: Adjust number of calls to builtin_free. Index: trans-array.c === --- trans-array.c (Revision 267829) +++ trans-array.c (Arbeitskopie) @@ -7740,7 +7740,6 @@ array_parameter_size (tree desc, gfc_expr *expr, t } /* Convert an array for passing as an actual parameter. */ -/* TODO: Optimize passing g77 arrays. */ void gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77, @@ -7866,11 +7865,23 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * no_pack = contiguous && no_pack; - /* If we have an expression, an array temporary will be - generated which does not need to be packed / unpacked - if passed to an explicit-shape dummy array. */ + /* If we have an EXPR_OP or a function returning an explicit-shaped + or allocatable array, an array temporary will be generated which + does not need to be packed / unpacked if passed to an + explicit-shape dummy array. */ - no_pack = no_pack || (g77 && expr->expr_type == EXPR_OP); + if (g77) +{ + if (expr->expr_type == EXPR_OP) + no_pack = 1; + else if (expr->expr_type == EXPR_FUNCTION && expr->value.function.esym) + { + gfc_symbol *result = expr->value.function.esym->result; + if (result->attr.dimension + && (result->as->type == AS_EXPLICIT || result->attr.allocatable)) + no_pack = 1; + } +} /* Array constructors are always contiguous and do not need packing. */ array_constructor = g77 && !this_array_result && expr->expr_type == EXPR_ARRAY; ! { dg-do compile } ! { dg-additional-options "-fdump-tree-original" } ! PR 59345 - pack/unpack was not needed here. ! Original test case by Joost VandeVondele SUBROUTINE S1(A) INTERFACE FUNCTION CONTIGUOUS_F1() RESULT(res) INTEGER :: res(5) END FUNCTION END INTERFACE CALL S2(CONTIGUOUS_F1()) END SUBROUTINE SUBROUTINE S3(A) INTERFACE FUNCTION CONTIGOUOS_F2() RESULT(res) INTEGER, ALLOCATABLE :: res(:) END FUNCTION END INTERFACE PROCEDURE(CONTIGOUOS_F2), POINTER :: A CALL S2(A()) END SUBROUTINE ! { dg-final { scan-tree-dump-not "_gfortran_internal_pack" "original" } } ! { dg-final { scan-tree-dump-not "_gfortran_internal_unpack" "original" } }
Re: [PATCH, d] Add README for process contributing to dmd and phobos
On Thu, 10 Jan 2019 at 18:26, Joseph Myers wrote: > > On Thu, 10 Jan 2019, Iain Buclaw wrote: > > > Hi, > > > > Joseph made mention that there isn't a readme documenting where > > changes to d/dmd, libphobos/libdruntime, and libphobos/src should go. > > > > I hope this clears things up. OK for trunk? > > This sort of patch is clearly covered by D maintainership. > OK. Thought it best to wait before committing, as the content is not meant for myself to understand. -- Iain
Add split_stack support for GNU/Hurd
Hello, I order to prepare for inclusion of gccgo to GNU/Hurd split stack support has been proposed, and in reality this patch has been applied to Debian since gcc-6. Please commit this patch. Late on updated patches for gccgo in gcc-8 and gcc-9? will be reviewed and committed with the aid of Ian Lance Taylor and Matthis Klose. Thanks! gcc/config/ChangeLog 2018-10-10 Svante Signell * gcc/config/i386/gnu.h: Enable split-stack support Index: gcc-snapshot-20181019-1.1/src/gcc/config/i386/gnu.h === --- gcc-snapshot-20181019-1.1.orig/src/gcc/config/i386/gnu.h +++ gcc-snapshot-20181019-1.1/src/gcc/config/i386/gnu.h @@ -37,11 +37,14 @@ along with GCC. If not, see
Re: ISO_Fortran_binding patch
Done as revision 267884. Thanks again. Paul On Sat, 12 Jan 2019 at 18:29, Paul Richard Thomas wrote: > > Hi Steve, > > Many thanks for the heads up. I had seen similar problems with the the > second testcase and I thought that I had fixed them. I will delete > them from the tree and will do more work to fix the problem(s). > > Cheers > > Paul > > On Sat, 12 Jan 2019 at 17:17, Steve Kargl > wrote: > > > > On Sat, Jan 12, 2019 at 09:10:27AM -0800, Steve Kargl wrote: > > > On Sat, Jan 12, 2019 at 03:28:02PM +, Paul Richard Thomas wrote: > > > > Hi Thomas, > > > > > > > > Committed as revision 267881. I removed the duplicate include file and > > > > added some documentation, as suggested. > > > > > > > > Many thanks for all the help > > > > > > > > > > Paul, > > > > > > I'm seeing the following failures. Note, I have my uncommitted > > > ENTRY patch in my tree. I won't be able to investigate for about > > > 30 minutes. > > > > > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O0 execution test > > > Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/debug/debug.exp ... > > > Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/dg.exp ... > > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O2 execution test > > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O3 -fomit-frame-pointer > > > -funroll-loops -fpeel-loops -ftracer -finline-functions execution test > > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O3 -g execution test > > > > > > > Regression testing finished faster than I thought. Doing > > > > % gmake check-fortran RUNTESTFLAGS="dg.exp=ISO_Fortran_binding_2.f90" > > ... > > === gfortran Summary === > > > > # of expected passes8 > > # of unexpected failures4 > > > > The first failure in the gfortran.log file is > > > > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 > > subscripts[0] = 3. > > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 > > subscripts[0] = -1. > > CFI_address: base address of C Descriptor must not be NULL. > > CFI_deallocate: Base address is already NULL. > > CFI_deallocate: C Descriptor must describe a pointer or allocatable object. > > CFI_allocate: Base address of C descriptor must be NULL. > > CFI_allocate: The object of the C descriptor must be a pointer or > > allocatable variable. > > CFI_establish: Rank must be between 0 and 15, 0 < rank (0 !< 16). > > CFI_establish: If the C Descriptor represents an allocatable variable > > (dv->attribute = 1), its base address must be NULL (dv->base_addr = NULL). > > CFI_establish: If base address is not NULL (base_addr != NULL), the > > established C descriptor is for a nonallocatable entity (attribute != 1). > > CFI_is_contiguous: Base address of C Descriptor is already NULL. > > CFI_is_contiguous: C Descriptor must describe an array (0 < dv->rank = 0). > > CFI_section: Base address of source must not be NULL. > > CFI_section: Source must describe an array (0 < source->rank, 0 !< 0). > > CFI_section: Rank of result must be equal to the rank of source minus the > > number of zeros in strides (result->rank = source->rank - zero_count, 1 != > > 1 - 1). > > CFI_section: Lower bounds must be within the bounds of the fortran array > > (source->dim[0].lower_bound <= lower_bounds[0] <= > > source->dim[0].lower_bound + source->dim[0].extent - 1, 0 <= -1 <= 99). > > CFI_section: Lower bounds must be within the bounds of the fortran array > > (source->dim[0].lower_bound <= lower_bo > > unds[0] <= source->dim[0].lower_bound + source->dim[0].extent - 1, 0 <= 100 > > <= 99). > > > > Program received signal SIGSEGV: Segmentation fault - invalid memory > > reference. > > > > Backtrace for this error: > > #0 0x71a2 in ??? > > #1 0x0 in ??? > > > > The 2nd, 3rd, and 4th failures are > > > > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 > > subscripts[0] = 3. > > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 > > subscripts[0] = -1. > > CFI_address: base address of C Descriptor must not be NULL. > > CFI_deallocate: Base address is already NULL. > > > > Program received signal SIGFPE: Floating-point exception - erroneous > > arithmetic operation. > > > > Backtrace for this error: > > #0 0x71a2 in ??? > > #1 0x400eed in ??? > > #2 0x4021ea in _start > > at /usr/src/lib/csu/amd64/crt1.c:76 > > #3 0x200628fff in ??? > > > > > > -- > > Steve > > > > -- > "If you can't explain it simply, you don't understand it well enough" > - Albert Einstein -- "If you can't explain it simply, you don't understand it well enough" - Albert Einstein
Set inline-unit-growth to 40
Hello, this patch sets inline-unit-growth to 40. The performance changes are - Firefox, LTO https://treeherder.mozilla.org/perf.html#/compare?originalProject=try=f7bd026e1a931b9a284d1c85c2577a72dd592820=try=74889968abcc688b8d161863566ed273c0401ee4=1=opt=1=1 After fixes to inlining priorities this makes difference without profile feedback only. Code size growth is about 9.15% with LTO and 3.95 with LTO and profile feedback. - Firefox noLTO https://treeherder.mozilla.org/perf.html#/compare?originalProject=try=c902b72340a3dca3114f58578c1c8f3e6a1cd89c=try=4974da6f92c144a9c09765b56a564a640069ddb9=1=1=1 With about 7% code size growth - SPEC https://lnt.opensuse.org/db_default/v4/CPP/latest_runs_report?num_runs=10_percentage_change=0.02=46e2bd1143b5c60af814916d7673879b34ceb3f6%2Cc0d79cfe9c4ec30823480f2f9b256600e8e3899f - C++ benchmarks https://lnt.opensuse.org/db_default/v4/SPEC/latest_runs_report?num_runs=10_changes=on_percentage_change=0.02=46e2bd1143b5c60af814916d7673879b34ceb3f6%2Cc0d79cfe9c4ec30823480f2f9b256600e8e3899f I am not entirely happy about the code-size/performance tradeoffs but it is concerned only for programs built with -O3 or having too many inline keywords. I have looked into inlining decisions for Firefox, HHVM and Clang and inliner gets out of growt bounds way too early and some of more performance aware projects already sets the limit up. I will tune other metrics down to handle some of the code size problems. Honza Index: ChangeLog === --- ChangeLog (revision 267882) +++ ChangeLog (working copy) @@ -1,3 +1,7 @@ +2019-01-05 Jan Hubicka + + * params.def (inline-unit-growth): Set to 40. + 2019-01-12 Jakub Jelinek * tree-ssa-loop-ivopts.c (find_inv_vars): Fix a comment typo. Index: params.def === --- params.def (revision 267882) +++ params.def (working copy) @@ -227,7 +227,7 @@ DEFPARAM(PARAM_LARGE_UNIT_INSNS, DEFPARAM(PARAM_INLINE_UNIT_GROWTH, "inline-unit-growth", "How much can given compilation unit grow because of the inlining (in percent).", -20, 0, 0) +40, 0, 0) DEFPARAM(PARAM_IPCP_UNIT_GROWTH, "ipcp-unit-growth", "How much can given compilation unit grow because of the interprocedural constant propagation (in percent).",
Re: ISO_Fortran_binding patch
Hi Steve, Many thanks for the heads up. I had seen similar problems with the the second testcase and I thought that I had fixed them. I will delete them from the tree and will do more work to fix the problem(s). Cheers Paul On Sat, 12 Jan 2019 at 17:17, Steve Kargl wrote: > > On Sat, Jan 12, 2019 at 09:10:27AM -0800, Steve Kargl wrote: > > On Sat, Jan 12, 2019 at 03:28:02PM +, Paul Richard Thomas wrote: > > > Hi Thomas, > > > > > > Committed as revision 267881. I removed the duplicate include file and > > > added some documentation, as suggested. > > > > > > Many thanks for all the help > > > > > > > Paul, > > > > I'm seeing the following failures. Note, I have my uncommitted > > ENTRY patch in my tree. I won't be able to investigate for about > > 30 minutes. > > > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O0 execution test > > Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/debug/debug.exp ... > > Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/dg.exp ... > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O2 execution test > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O3 -fomit-frame-pointer > > -funroll-loops -fpeel-loops -ftracer -finline-functions execution test > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O3 -g execution test > > > > Regression testing finished faster than I thought. Doing > > % gmake check-fortran RUNTESTFLAGS="dg.exp=ISO_Fortran_binding_2.f90" > ... > === gfortran Summary === > > # of expected passes8 > # of unexpected failures4 > > The first failure in the gfortran.log file is > > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 > subscripts[0] = 3. > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 > subscripts[0] = -1. > CFI_address: base address of C Descriptor must not be NULL. > CFI_deallocate: Base address is already NULL. > CFI_deallocate: C Descriptor must describe a pointer or allocatable object. > CFI_allocate: Base address of C descriptor must be NULL. > CFI_allocate: The object of the C descriptor must be a pointer or allocatable > variable. > CFI_establish: Rank must be between 0 and 15, 0 < rank (0 !< 16). > CFI_establish: If the C Descriptor represents an allocatable variable > (dv->attribute = 1), its base address must be NULL (dv->base_addr = NULL). > CFI_establish: If base address is not NULL (base_addr != NULL), the > established C descriptor is for a nonallocatable entity (attribute != 1). > CFI_is_contiguous: Base address of C Descriptor is already NULL. > CFI_is_contiguous: C Descriptor must describe an array (0 < dv->rank = 0). > CFI_section: Base address of source must not be NULL. > CFI_section: Source must describe an array (0 < source->rank, 0 !< 0). > CFI_section: Rank of result must be equal to the rank of source minus the > number of zeros in strides (result->rank = source->rank - zero_count, 1 != 1 > - 1). > CFI_section: Lower bounds must be within the bounds of the fortran array > (source->dim[0].lower_bound <= lower_bounds[0] <= source->dim[0].lower_bound > + source->dim[0].extent - 1, 0 <= -1 <= 99). > CFI_section: Lower bounds must be within the bounds of the fortran array > (source->dim[0].lower_bound <= lower_bo > unds[0] <= source->dim[0].lower_bound + source->dim[0].extent - 1, 0 <= 100 > <= 99). > > Program received signal SIGSEGV: Segmentation fault - invalid memory > reference. > > Backtrace for this error: > #0 0x71a2 in ??? > #1 0x0 in ??? > > The 2nd, 3rd, and 4th failures are > > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 > subscripts[0] = 3. > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 > subscripts[0] = -1. > CFI_address: base address of C Descriptor must not be NULL. > CFI_deallocate: Base address is already NULL. > > Program received signal SIGFPE: Floating-point exception - erroneous > arithmetic operation. > > Backtrace for this error: > #0 0x71a2 in ??? > #1 0x400eed in ??? > #2 0x4021ea in _start > at /usr/src/lib/csu/amd64/crt1.c:76 > #3 0x200628fff in ??? > > > -- > Steve -- "If you can't explain it simply, you don't understand it well enough" - Albert Einstein
Re: [wwwdocs] Add __cpp_* feature macros to C++20 entries + other changes that have those in projects/cxx_status.html
On Sat, Jan 12, 2019 at 04:03:57PM +0100, Gerald Pfeifer wrote: > On Sat, 12 Jan 2019, Jakub Jelinek wrote: > > Ok for wwwdocs (or do you suggest something different for the P0941R2 > > imlementation status)? > > I think that was a question to Jason and Jonathan wrt contents? Yeah. > Markup-wise this looks fine, and adding those links looks like a > good idea. Thanks. Jakub
Re: ISO_Fortran_binding patch
On Sat, Jan 12, 2019 at 09:10:27AM -0800, Steve Kargl wrote: > On Sat, Jan 12, 2019 at 03:28:02PM +, Paul Richard Thomas wrote: > > Hi Thomas, > > > > Committed as revision 267881. I removed the duplicate include file and > > added some documentation, as suggested. > > > > Many thanks for all the help > > > > Paul, > > I'm seeing the following failures. Note, I have my uncommitted > ENTRY patch in my tree. I won't be able to investigate for about > 30 minutes. > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O0 execution test > Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/debug/debug.exp ... > Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/dg.exp ... > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O2 execution test > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O3 -fomit-frame-pointer > -funroll-loops -fpeel-loops -ftracer -finline-functions execution test > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O3 -g execution test > Regression testing finished faster than I thought. Doing % gmake check-fortran RUNTESTFLAGS="dg.exp=ISO_Fortran_binding_2.f90" ... === gfortran Summary === # of expected passes8 # of unexpected failures4 The first failure in the gfortran.log file is CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 subscripts[0] = 3. CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 subscripts[0] = -1. CFI_address: base address of C Descriptor must not be NULL. CFI_deallocate: Base address is already NULL. CFI_deallocate: C Descriptor must describe a pointer or allocatable object. CFI_allocate: Base address of C descriptor must be NULL. CFI_allocate: The object of the C descriptor must be a pointer or allocatable variable. CFI_establish: Rank must be between 0 and 15, 0 < rank (0 !< 16). CFI_establish: If the C Descriptor represents an allocatable variable (dv->attribute = 1), its base address must be NULL (dv->base_addr = NULL). CFI_establish: If base address is not NULL (base_addr != NULL), the established C descriptor is for a nonallocatable entity (attribute != 1). CFI_is_contiguous: Base address of C Descriptor is already NULL. CFI_is_contiguous: C Descriptor must describe an array (0 < dv->rank = 0). CFI_section: Base address of source must not be NULL. CFI_section: Source must describe an array (0 < source->rank, 0 !< 0). CFI_section: Rank of result must be equal to the rank of source minus the number of zeros in strides (result->rank = source->rank - zero_count, 1 != 1 - 1). CFI_section: Lower bounds must be within the bounds of the fortran array (source->dim[0].lower_bound <= lower_bounds[0] <= source->dim[0].lower_bound + source->dim[0].extent - 1, 0 <= -1 <= 99). CFI_section: Lower bounds must be within the bounds of the fortran array (source->dim[0].lower_bound <= lower_bo unds[0] <= source->dim[0].lower_bound + source->dim[0].extent - 1, 0 <= 100 <= 99). Program received signal SIGSEGV: Segmentation fault - invalid memory reference. Backtrace for this error: #0 0x71a2 in ??? #1 0x0 in ??? The 2nd, 3rd, and 4th failures are CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 subscripts[0] = 3. CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 subscripts[0] = -1. CFI_address: base address of C Descriptor must not be NULL. CFI_deallocate: Base address is already NULL. Program received signal SIGFPE: Floating-point exception - erroneous arithmetic operation. Backtrace for this error: #0 0x71a2 in ??? #1 0x400eed in ??? #2 0x4021ea in _start at /usr/src/lib/csu/amd64/crt1.c:76 #3 0x200628fff in ??? -- Steve
Re: ISO_Fortran_binding patch
On Sat, Jan 12, 2019 at 03:28:02PM +, Paul Richard Thomas wrote: > Hi Thomas, > > Committed as revision 267881. I removed the duplicate include file and > added some documentation, as suggested. > > Many thanks for all the help > Paul, I'm seeing the following failures. Note, I have my uncommitted ENTRY patch in my tree. I won't be able to investigate for about 30 minutes. FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O0 execution test Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/debug/debug.exp ... Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/dg.exp ... FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O2 execution test FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions execution test FAIL: gfortran.dg/ISO_Fortran_binding_2.f90 -O3 -g execution test -- Steve
[committed] Fix 2 comment typos
Hi! While is't is a contraction of "is it", it doesn't make sense in either of these spots and I believe isn't was meant there instead. Committed as obvious to trunk. 2019-01-12 Jakub Jelinek * tree-ssa-loop-ivopts.c (find_inv_vars): Fix a comment typo. * c-typeck.c (convert_for_assignment): Fix a comment typo. --- gcc/tree-ssa-loop-ivopts.c.jj 2019-01-10 11:43:08.936467241 +0100 +++ gcc/tree-ssa-loop-ivopts.c 2019-01-12 16:48:09.834963620 +0100 @@ -3037,7 +3037,7 @@ find_inv_vars (struct ivopts_data *data, It's hard to make decision whether constant part should be stripped or not. We choose to not strip based on below facts: 1) We need to count ADD cost for constant part if it's stripped, - which is't always trivial where this functions is called. + which isn't always trivial where this functions is called. 2) Stripping constant away may be conflict with following loop invariant hoisting pass. 3) Not stripping constant away results in more invariant exprs, --- gcc/c/c-typeck.c.jj 2019-01-01 12:37:48.607458942 +0100 +++ gcc/c/c-typeck.c2019-01-12 17:18:52.727211448 +0100 @@ -7283,7 +7283,7 @@ convert_for_assignment (location_t locat } } - /* If RHS is't an address, check pointer or array of packed + /* If RHS isn't an address, check pointer or array of packed struct or union. */ warn_for_address_or_pointer_of_packed_member (TREE_CODE (orig_rhs) != ADDR_EXPR, type, orig_rhs); Jakub
[PATCH, testsuite] Skip new charset tests on Darwin8-10.
Hi, These earlier Darwin versions have “FP_≈” inside a comment in architecture/{ppc,i386}/math.h, which is included by math.h which causes the tests to fail. The intent of the tests (i.e. to ensure that the library itself does not emit non-ascii) is covered by other platforms, including later Darwin editions. AFAICT, this issue was fixed from Darwin11 onwards (although I have not tested every edition / looked for other possible non-ascii cases, in other headers). Since there’s no expectation that the headers would ever be updated, and it doesn’t seem worth applying fixincludes for this, let’s skip the tests on versions with the issue. Tested on powerpc-darwin9, x86_64-darwin10 and x86_64-darwin18. OK for trunk? Iain libstdc++v3/testsuite/ * 17_intro/headers/c++1998/charset.cc: Skip for Darwin8 to Darwin10. * 17_intro/headers/c++2011/charset.cc: Likewise. * 17_intro/headers/c++2014/charset.cc: Likewise. * 17_intro/headers/c++2017/charset.cc: Likewise. * 17_intro/headers/c++2020/charset.cc: Likewise. diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++1998/charset.cc b/libstdc++-v3/testsuite/17_intro/headers/c++1998/charset.cc index 864c64e..4425e1c 100644 --- a/libstdc++-v3/testsuite/17_intro/headers/c++1998/charset.cc +++ b/libstdc++-v3/testsuite/17_intro/headers/c++1998/charset.cc @@ -1,4 +1,5 @@ // { dg-options "-finput-charset=ascii" } // { dg-do compile } +// { dg-skip-if "non-ascii in system headers" { *-*-darwin10* *-*-darwin[89]* } } #include diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2011/charset.cc b/libstdc++-v3/testsuite/17_intro/headers/c++2011/charset.cc index 864c64e..4425e1c 100644 --- a/libstdc++-v3/testsuite/17_intro/headers/c++2011/charset.cc +++ b/libstdc++-v3/testsuite/17_intro/headers/c++2011/charset.cc @@ -1,4 +1,5 @@ // { dg-options "-finput-charset=ascii" } // { dg-do compile } +// { dg-skip-if "non-ascii in system headers" { *-*-darwin10* *-*-darwin[89]* } } #include diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2014/charset.cc b/libstdc++-v3/testsuite/17_intro/headers/c++2014/charset.cc index 864c64e..4425e1c 100644 --- a/libstdc++-v3/testsuite/17_intro/headers/c++2014/charset.cc +++ b/libstdc++-v3/testsuite/17_intro/headers/c++2014/charset.cc @@ -1,4 +1,5 @@ // { dg-options "-finput-charset=ascii" } // { dg-do compile } +// { dg-skip-if "non-ascii in system headers" { *-*-darwin10* *-*-darwin[89]* } } #include diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2017/charset.cc b/libstdc++-v3/testsuite/17_intro/headers/c++2017/charset.cc index 864c64e..4425e1c 100644 --- a/libstdc++-v3/testsuite/17_intro/headers/c++2017/charset.cc +++ b/libstdc++-v3/testsuite/17_intro/headers/c++2017/charset.cc @@ -1,4 +1,5 @@ // { dg-options "-finput-charset=ascii" } // { dg-do compile } +// { dg-skip-if "non-ascii in system headers" { *-*-darwin10* *-*-darwin[89]* } } #include diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2020/charset.cc b/libstdc++-v3/testsuite/17_intro/headers/c++2020/charset.cc index 864c64e..4425e1c 100644 --- a/libstdc++-v3/testsuite/17_intro/headers/c++2020/charset.cc +++ b/libstdc++-v3/testsuite/17_intro/headers/c++2020/charset.cc @@ -1,4 +1,5 @@ // { dg-options "-finput-charset=ascii" } // { dg-do compile } +// { dg-skip-if "non-ascii in system headers" { *-*-darwin10* *-*-darwin[89]* } } #include
[wwwdocs] svnwrite.html - reduce references to SVN
When migrating from CVS to SVN I already reduced direct references to CVS/SVN, and expecting a migration to GIT in this decade let me take the opportunity to generalize/simplify things a bit further. Committed. (Perhaps we should also start refering to the more general term "commit" vs "check in"?) Gerald Index: svnwrite.html === RCS file: /cvs/gcc/wwwdocs/htdocs/svnwrite.html,v retrieving revision 1.42 diff -u -r1.42 svnwrite.html --- svnwrite.html 30 Sep 2018 14:38:47 - 1.42 +++ svnwrite.html 12 Jan 2019 16:23:09 - @@ -142,7 +142,7 @@ Free for all -The following changes can be made by everyone with SVN write access: +The following changes can be made by everyone with write access: Obvious fixes can be committed without prior approval. Just check in the fix and copy it to gcc-patches. A good test to @@ -207,13 +207,12 @@ when performing checkins to avoid accidental checkins of local code. -We prefer that each SVN checkin be of a complete, single logical +We prefer that each checkin be of a complete, single logical change, which may affect multiple files. The log message for that checkin should be the complete ChangeLog entry for the change. This makes it easier to correlate changes across files, and minimizes the time the repository is inconsistent. If you have several unrelated -changes, you should check them in with separate SVN commit -commands. +changes, you should check them in separately. Sync your sources with the master repository via "svn
Re: ISO_Fortran_binding patch
Hi Thomas, Committed as revision 267881. I removed the duplicate include file and added some documentation, as suggested. Many thanks for all the help Paul On Tue, 8 Jan 2019 at 23:19, Thomas Koenig wrote: > > Hi Paul, > > > This is an updated version of the earlier patch. The main addition is > > a second testcase that checks the errors emitted by the CFI API > > functions. > > I notice that the header file ISO_Fortran_binding.h is found twice > in the patch. > > Is there any particular reason why you do not want to use > > ! { dg-additional-options "-I $srcdir/../../libgfortran" } > > in the test cases, and have it only once in the source trees? > > However, I have no real strong opinion on this matter, if you > want to keep it as submitted, it is also fine. > > Therefore: OK for trunk, and thanks a lot for the patch! > > Documentation we can add at a later date, I think. > > Regards > > Thomas -- "If you can't explain it simply, you don't understand it well enough" - Albert Einstein
Re: [wwwdocs] Add __cpp_* feature macros to C++20 entries + other changes that have those in projects/cxx_status.html
On Sat, 12 Jan 2019, Jakub Jelinek wrote: > Ok for wwwdocs (or do you suggest something different for the P0941R2 > imlementation status)? I think that was a question to Jason and Jonathan wrt contents? Markup-wise this looks fine, and adding those links looks like a good idea. Gerald
[C++ PATCH] Add __cpp_guaranteed_copy_elision and __cpp_nontype_template_parameter_auto
Hi! So, from what I can understand, __cpp_guaranteed_copy_elision is a C++17 P0135R1 feature test macro for a feature we claim to support, and __cpp_nontype_template_parameter_auto is a new name for the __cpp_template_auto macro (which doesn't appear anymore in the SD-6 lists, but clang++ keeps it for backwards compatibility too). Tested on x86_64-linux, ok for trunk? 2019-01-12 Jakub Jelinek * c-cppbuiltin.c (c_cpp_builtin): Define __cpp_guaranteed_copy_elision and __cpp_nontype_template_parameter_auto. Add a comment that __cpp_template_auto is deprecated. * g++.dg/cpp1z/feat-cxx1z.C: Add tests for __cpp_guaranteed_copy_elision and __cpp_nontype_template_parameter_auto feature test macros. * g++.dg/cpp2a/feat-cxx2a.C: Likewise. --- gcc/c-family/c-cppbuiltin.c.jj 2019-01-01 12:37:51.340414101 +0100 +++ gcc/c-family/c-cppbuiltin.c 2019-01-12 14:20:12.792148907 +0100 @@ -971,9 +971,13 @@ c_cpp_builtins (cpp_reader *pfile) cpp_define (pfile, "__cpp_aggregate_bases=201603"); cpp_define (pfile, "__cpp_deduction_guides=201703"); cpp_define (pfile, "__cpp_noexcept_function_type=201510"); + /* Old macro, superseded by +__cpp_nontype_template_parameter_auto. */ cpp_define (pfile, "__cpp_template_auto=201606"); cpp_define (pfile, "__cpp_structured_bindings=201606"); cpp_define (pfile, "__cpp_variadic_using=201611"); + cpp_define (pfile, "__cpp_guaranteed_copy_elision=201606"); + cpp_define (pfile, "__cpp_nontype_template_parameter_auto=201606"); } if (cxx_dialect > cxx17) { --- gcc/testsuite/g++.dg/cpp1z/feat-cxx1z.C.jj 2018-10-22 09:28:06.387657035 +0200 +++ gcc/testsuite/g++.dg/cpp1z/feat-cxx1z.C 2019-01-12 14:27:15.014332003 +0100 @@ -417,6 +417,18 @@ # error "__cpp_variadic_using != 201611" #endif +#ifndef __cpp_guaranteed_copy_elision +# error "__cpp_guaranteed_copy_elision" +#elif __cpp_guaranteed_copy_elision != 201606 +# error "__cpp_guaranteed_copy_elision != 201606" +#endif + +#ifndef __cpp_nontype_template_parameter_auto +# error "__cpp_nontype_template_parameter_auto" +#elif __cpp_nontype_template_parameter_auto != 201606 +# error "__cpp_nontype_template_parameter_auto != 201606" +#endif + #ifdef __has_cpp_attribute # if ! __has_cpp_attribute(maybe_unused) --- gcc/testsuite/g++.dg/cpp2a/feat-cxx2a.C.jj 2018-11-17 00:16:41.302392263 +0100 +++ gcc/testsuite/g++.dg/cpp2a/feat-cxx2a.C 2019-01-12 14:29:03.283584283 +0100 @@ -416,6 +416,18 @@ # error "__cpp_variadic_using != 201611" #endif +#ifndef __cpp_guaranteed_copy_elision +# error "__cpp_guaranteed_copy_elision" +#elif __cpp_guaranteed_copy_elision != 201606 +# error "__cpp_guaranteed_copy_elision != 201606" +#endif + +#ifndef __cpp_nontype_template_parameter_auto +# error "__cpp_nontype_template_parameter_auto" +#elif __cpp_nontype_template_parameter_auto != 201606 +# error "__cpp_nontype_template_parameter_auto != 201606" +#endif + // C++20 features #if __cpp_conditional_explicit != 201806 Jakub
[PATCH, powerpc] Fix speculation barrier and group nop to emit target register names.
Hi, The current implementation of “speculation_barrier” and “group_end_nop” insns emit hard-wired register names which causes tests using them to fail on Darwin, at least, which uses “rNN” instead of “NN”. The patch makes the register names for these insns use the operand output mechanism to substitute the appropriate variant when needed. tested on powerpc-darwin9 and powerpc64-linux. OK for trunk? Iain gcc/ * config/rs6000/rs6000.md (group_end_nop): Emit insn register names using operand format, rather than hard-wired. (speculation_barrier): Likewise. diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 56364e0..86badc2 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -12494,15 +12494,18 @@ [(unspec [(const_int 0)] UNSPEC_GRP_END_NOP)] "" { - if (rs6000_tune == PROCESSOR_POWER6) -return "ori 1,1,0"; - return "ori 2,2,0"; + operands[0] = gen_rtx_REG (Pmode, +rs6000_tune == PROCESSOR_POWER6 ? 1 : 2); + return "ori %0,%0,0"; }) (define_insn "speculation_barrier" [(unspec_volatile:BLK [(const_int 0)] UNSPECV_SPEC_BARRIER)] "" - "ori 31,31,0") +{ + operands[0] = gen_rtx_REG (Pmode, 31); + return "ori %0,%0,0"; +}) ;; Define the subtract-one-and-jump insns, starting with the template ;; so loop.c knows what to generate.
[wwwdocs] Add __cpp_* feature macros to C++20 entries + other changes that have those in projects/cxx_status.html
Hi! On Fri, Jan 11, 2019 at 04:54:11PM +0100, Jakub Jelinek wrote: > I've noticed we don't have any feature test macros in the table for C++20, > even when a couple of the features have them defined. Here is an updated patch, that in addition to that makes 9 URLs as we now have #cxx in gcc-9/changes.html and adds missing P0941R2 entry that clang table has. For that one I'm not 100% sure what to say, I've copied all the macros from http://wg21.link/p0941r2 into two source files (attached below), one for core language features, another one for library and tested those with -std=c++2a with current trunk. Compared to what the paper lists, we have __has_cpp_attribute (carries_dependency) 0, __cpp_guaranteed_copy_elision and __cpp_nontype_template_parameter_auto not defined. Is that what we want? On the library side, __cpp_lib_any, __cpp_lib_execution, __cpp_lib_hardware_interference_size, __cpp_lib_null_iterators, __cpp_lib_parallel_algorithm, __cpp_lib_raw_memory_algorithms, __cpp_lib_to_chars, __cpp_lib_uncaught_exceptions, __cpp_lib_variant macros aren't defined (at least not in ) and __cpp_lib_optional, __cpp_lib_shared_ptr_arrays, __cpp_lib_string_view have smaller values than those in the P0941R2. Is that the desirable state given current C++2A implementation status? Ok for wwwdocs (or do you suggest something different for the P0941R2 imlementation status)? --- htdocs/projects/cxx-status.html.jj 2019-01-12 12:27:22.966732519 +0100 +++ htdocs/projects/cxx-status.html 2019-01-12 13:57:47.457900425 +0100 @@ -115,7 +115,7 @@ Range-based for statements with initializer http://wg21.link/p0614r1;>P0614R1 - 9 + 9 @@ -127,28 +127,28 @@ ADL and function templates that are not visible http://wg21.link/p0846r0;>P0846R0 - 9 + 9 const mismatch with defaulted copy constructor http://wg21.link/p0641r2;>P0641R2 - 9 + 9 Less eager instantiation of constexpr functions http://wg21.link/p0859r0;>P0859R0 - 5.2 (mostly)9 (P0859R0) + 5.2 (mostly) 9 (P0859R0) Consistent comparison (operator=) http://wg21.link/p0515r3;>P0515R3 - http://wg21.link/P0905r1;>P0905R1 + http://wg21.link/p0905r1;>P0905R1 http://wg21.link/p1120r0;>P1120R0 No - + __cpp_impl_three_way_comparison = 201711 Access checking on specializations @@ -159,19 +159,19 @@ Default constructible and assignable stateless lambdas http://wg21.link/p0624r2;>P0624R2 - 9 + 9 Lambdas in unevaluated contexts http://wg21.link/p0315r4;>P0315R4 - 9 + 9 Language support for empty objects http://wg21.link/p0840r2;>P0840R2 - 9 + 9 @@ -195,32 +195,32 @@ Down with typename! http://wg21.link/p0634r3;>P0634R3 - 9 + 9 Allow pack expansion in lambda init-capture http://wg21.link/p0780r2;>P0780R2 - 9 + 9 Proposed wording for likely and unlikely attributes http://wg21.link/p0479r5;>P0479R5 - 9 + 9 Deprecate implicit capture of this via [=] http://wg21.link/p0806r2;>P0806R2 - 9 + 9 Class Types in Non-Type Template Parameters http://wg21.link/p0732r2;>P0732R2 - 9 - + 9 + __cpp_nontype_template_parameter_class = 201806 Atomic Compare-and-Exchange with Padding Bits @@ -231,19 +231,19 @@ Efficient sized delete for variable sized classes http://wg21.link/p0722r3;>P0722R3 - 9 - + 9 + __cpp_impl_destroying_delete = 201806 Allowing Virtual Function Calls in Constant Expressions http://wg21.link/p1064r0;>P1064R0 - 9 + 9 Prohibit aggregates with user-declared constructors http://wg21.link/p1008r1;>P1008R1 - 9 + 9 @@ -256,20 +256,20 @@ explicit(bool) http://wg21.link/p0892r2;>P0892R2 - 9 - + 9 + __cpp_conditional_explicit = 201806 Signed integers are two's complement http://wg21.link/p1236r1;>P1236R1 - 9 + 9 char8_t http://wg21.link/p0482r6;>P0482R6 No - + __cpp_char8_t = 201811 Immediate functions (consteval) @@ -280,23 +280,29 @@ std::is_constant_evaluated http://wg21.link/p0595r2;>P0595R2 - 9 + 9 Nested inline namespaces http://wg21.link/p1094r2;>P1094R2 - 9
Re: [PATCH] PR fortran/61765 -- Avoid ENTRY names in check of repeditive symbols
Hi Steve, This is OK for trunk. Thanks Paul On Sat, 12 Jan 2019 at 04:34, Steve Kargl wrote: > > The attached patch has been tested on x86_64-*-freebsd. There > were no regression. The patch is less then obvious, but simple. > OK to commit? > > 2019-01-11 Steven G. Kargl > > PR fortran/61765 > * resolve.c (gfc_verify_binding_labels): Break if-elseif-elseif > structure into independent > if's with a return to simplify logic. Avoid a check for ENTRY name > with bind(c). > > 2019-01-11 Steven G. Kargl > > PR fortran/61765 > * gfortran.dg/pr61765.f90: New test. > > -- > Steve -- "If you can't explain it simply, you don't understand it well enough" - Albert Einstein
Re: [wwwdoc][Patch] Mention Loongson 3a1000 3a2000 3a3000 2k1000 support in gcc9
ping? On Mon, Dec 31, 2018 at 6:27 PM Paul Hua wrote: > > Hi Gerald, > > The attached patch mention Loongson 3a1000 3a2000 3a3000 2k1000 support in > gcc9. > > ok for commit?
[committed][nvptx] Allow default vl to be overridden in nvptx_goacc_validate_dims_1
Hi, In nvptx_goacc_validate_dims_1, allow oacc_default_dims[DIM_VECTOR] to be overridden, by assigning it to a new variable default_vector_length at the start, and using it at the end. Committed to trunk. Thanks, - Tom [nvptx] Allow default vl to be overridden in nvptx_goacc_validate_dims_1 2019-01-11 Tom de Vries * config/nvptx/nvptx.c (nvptx_goacc_validate_dims_1): Add an use new variable default_vector_length. --- gcc/config/nvptx/nvptx.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index a4c79532a1d..95d72d0a4e0 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -5556,6 +5556,7 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level, unsigned used) bool offload_region_p = false; bool routine_p = false; bool routine_seq_p = false; + int default_vector_length = -1; if (decl == NULL_TREE) { @@ -5654,6 +5655,12 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level, unsigned used) gcc_assert (dims[GOMP_DIM_GANG] >= -1); } + if (offload_region_p) +default_vector_length = oacc_get_default_dim (GOMP_DIM_VECTOR); + else +/* oacc_default_dims_p. */ +default_vector_length = PTX_DEFAULT_VECTOR_LENGTH; + int old_dims[GOMP_DIM_MAX]; unsigned int i; for (i = 0; i < GOMP_DIM_MAX; ++i) @@ -5673,12 +5680,12 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level, unsigned used) if (dims[GOMP_DIM_VECTOR] == 0) { vector_reason = G_("using vector_length (%d), ignoring runtime setting"); - dims[GOMP_DIM_VECTOR] = PTX_DEFAULT_VECTOR_LENGTH; + dims[GOMP_DIM_VECTOR] = default_vector_length; } if (dims[GOMP_DIM_VECTOR] > 0 && !nvptx_welformed_vector_length_p (dims[GOMP_DIM_VECTOR])) -dims[GOMP_DIM_VECTOR] = PTX_DEFAULT_VECTOR_LENGTH; +dims[GOMP_DIM_VECTOR] = default_vector_length; nvptx_apply_dim_limits (dims); @@ -5696,7 +5703,7 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level, unsigned used) if (oacc_default_dims_p) { - dims[GOMP_DIM_VECTOR] = PTX_DEFAULT_VECTOR_LENGTH; + dims[GOMP_DIM_VECTOR] = default_vector_length; if (dims[GOMP_DIM_WORKER] < 0) dims[GOMP_DIM_WORKER] = PTX_DEFAULT_RUNTIME_DIM; if (dims[GOMP_DIM_GANG] < 0) @@ -5715,7 +5722,9 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level, unsigned used) /* Function oacc_validate_dims will apply the minimal dimension. */ continue; - dims[i] = oacc_get_default_dim (i); + dims[i] = (i == GOMP_DIM_VECTOR +? default_vector_length +: oacc_get_default_dim (i)); } nvptx_apply_dim_limits (dims);
[committed][nvptx] Apply vector-partitionable routines workaround to default vl
Hi, Make "[nvptx] Force vl32 if calling vector-partitionable routines" work as well if vector length is set by modifying PTX_DEFAULT_VECTOR_LENGTH. Committed to trunk. Thanks, - Tom [nvptx] Apply vector-partitionable routines workaround to default vl 2019-01-11 Tom de Vries * config/nvptx/nvptx.c (nvptx_goacc_validate_dims_1): In offloading region calling vector-partitionable routine, set default_vector_length to WARP_SIZE. --- gcc/config/nvptx/nvptx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 95d72d0a4e0..1d9704543d9 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -5669,6 +5669,8 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level, unsigned used) const char *vector_reason = NULL; if (offload_region_p && has_vector_partitionable_routine_calls_p (decl)) { + default_vector_length = PTX_WARP_SIZE; + if (dims[GOMP_DIM_VECTOR] > PTX_WARP_SIZE) { vector_reason = G_("using vector_length (%d) due to call to"
[committed][nvptx] Verify dimension limits after applying defaults
Hi, There's a problem in oacc_validate_dims that when f.i. the worker dimension is set using -fopenacc-dim=:32, and the vector_length is set using a "vector_length (128)" clause, the compiler combines, accepts and emits the values, while the combination of the two is invalid. The reason for this is that while oacc_validate_dims validates the dimensions using targetm.goacc.validate_dims before applying default or minimum values, it does not do so afterwards. Work around this in the nvptx port by applying the defaults from oacc_default_dims at the end of nvptx_goacc_validate_dims_1, as oacc_validate_dims would do it, and then apply the dimensions limits. Committed to trunk. Thanks, - Tom [nvptx] Verify dimension limits after applying defaults 2019-01-11 Tom de Vries PR middle-end/88703 * config/nvptx/nvptx.c (nvptx_goacc_validate_dims_1): Apply defaults from oacc_default_dims, as oacc_validate_dims would do it, and apply dimensions limits. --- gcc/config/nvptx/nvptx.c | 17 + 1 file changed, 17 insertions(+) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 89f0e560910..a4c79532a1d 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -5703,6 +5703,23 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level, unsigned used) dims[GOMP_DIM_GANG] = PTX_DEFAULT_RUNTIME_DIM; nvptx_apply_dim_limits (dims); } + + if (offload_region_p) +{ + for (i = 0; i < GOMP_DIM_MAX; i++) + { + if (!(dims[i] < 0)) + continue; + + if ((used & GOMP_DIM_MASK (i)) == 0) + /* Function oacc_validate_dims will apply the minimal dimension. */ + continue; + + dims[i] = oacc_get_default_dim (i); + } + + nvptx_apply_dim_limits (dims); +} } /* Validate compute dimensions of an OpenACC offload or routine, fill
[committed][openacc] Add used parameter to TARGET_GOACC_VALIDATE_DIMS
Hi, Add a used parameter to TARGET_GOACC_VALIDATE_DIMS, allowing a target to make decisions in the hook implementation based on whether a dimension is used or not. Committed to trunk. Thanks, - Tom [openacc] Add used parameter to TARGET_GOACC_VALIDATE_DIMS 2019-01-11 Tom de Vries * config/nvptx/nvptx.c (nvptx_goacc_validate_dims_1) (nvptx_goacc_validate_dims): Add used parameter. * doc/tm.texi: Regenerate. * omp-offload.c (oacc_parse_default_dims, oacc_validate_dims): Add argument to call to targetm.goacc.validate_dims. (default_goacc_validate_dims): Add used parameter. * target.def (validate_dims): Add used parameter in DEFHOOK. * targhooks.h (default_goacc_validate_dims): Add used parameter. --- gcc/config/nvptx/nvptx.c | 6 +++--- gcc/doc/tm.texi | 2 +- gcc/omp-offload.c| 9 + gcc/target.def | 2 +- gcc/targhooks.h | 2 +- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index b37010ff58e..89f0e560910 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -5549,7 +5549,7 @@ has_vector_partitionable_routine_calls_p (tree fndecl) DIMS has changed. */ static void -nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level) +nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level, unsigned used) { bool oacc_default_dims_p = false; bool oacc_min_dims_p = false; @@ -5711,7 +5711,7 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level) DECL is null, we are validating the default dimensions. */ static bool -nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level) +nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level, unsigned used) { int old_dims[GOMP_DIM_MAX]; unsigned int i; @@ -5719,7 +5719,7 @@ nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level) for (i = 0; i < GOMP_DIM_MAX; ++i) old_dims[i] = dims[i]; - nvptx_goacc_validate_dims_1 (decl, dims, fn_level); + nvptx_goacc_validate_dims_1 (decl, dims, fn_level, used); gcc_assert (dims[GOMP_DIM_VECTOR] != 0); if (dims[GOMP_DIM_WORKER] > 0 && dims[GOMP_DIM_VECTOR] > 0) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index ddde4a7287b..355fd5917af 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6071,7 +6071,7 @@ to use it. Return number of threads in SIMT thread group on the target. @end deftypefn -@deftypefn {Target Hook} bool TARGET_GOACC_VALIDATE_DIMS (tree @var{decl}, int *@var{dims}, int @var{fn_level}) +@deftypefn {Target Hook} bool TARGET_GOACC_VALIDATE_DIMS (tree @var{decl}, int *@var{dims}, int @var{fn_level}, unsigned @var{used}) This hook should check the launch dimensions provided for an OpenACC compute region, or routine. Defaulted values are represented as -1 and non-constant values as 0. The @var{fn_level} is negative for the diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c index 9cac5655c63..201c459fa1f 100644 --- a/gcc/omp-offload.c +++ b/gcc/omp-offload.c @@ -644,8 +644,8 @@ oacc_parse_default_dims (const char *dims) } /* Allow the backend to validate the dimensions. */ - targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1); - targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2); + targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0); + targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0); } /* Validate and update the dimensions for offloaded FN. ATTRS is the @@ -673,7 +673,7 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used) pos = TREE_CHAIN (pos); } - bool changed = targetm.goacc.validate_dims (fn, dims, level); + bool changed = targetm.goacc.validate_dims (fn, dims, level, used); /* Default anything left to 1 or a partitioned default. */ for (ix = 0; ix != GOMP_DIM_MAX; ix++) @@ -1717,7 +1717,8 @@ execute_oacc_device_lower () bool default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims, -int ARG_UNUSED (fn_level)) +int ARG_UNUSED (fn_level), +unsigned ARG_UNUSED (used)) { bool changed = false; diff --git a/gcc/target.def b/gcc/target.def index 2aeb1ff8445..32830a1b33c 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1686,7 +1686,7 @@ are being validated and unspecified defaults should be filled in.\n\ Diagnostics should be issued as appropriate. Return\n\ true, if changes have been made. You must override this hook to\n\ provide dimensions larger than 1.", -bool, (tree decl, int *dims, int fn_level), +bool, (tree decl, int *dims, int fn_level, unsigned used), default_goacc_validate_dims) DEFHOOK diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 3b6e404f080..fb133fd3f3f 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -121,7 +121,7 @@ extern void default_finish_cost (void *,
[committed] Tweak m4/minloc0.m4 indentation to avoid -Wmisleading-indentation warnings (PR libfortran/88807)
Hi! The following patch reindents parts of m4/minloc0.m4 to avoid ../../../libgfortran/generated/minloc0_4_i1.c:138:5: warning: this ‘else’ clause does not guard... [-Wmisleading-indentation] etc. warnings. The indentation of the generated sources is misleading for many of the sources and fixing it properly would be harder - this patch just changes it so that we don't have else something; following statements; which -Wmisleading-indentation among other things warns about. Bootstrapped/regtested on x86_64-linux and i686-linux, preapproved by Steven in the PR, commited to trunk. If we ignored 8 spaces vs. tabs, perhaps we could define a couple of indentN m4 macros and used them in front of the various chunks provided from iforeach*.m4, so that different *.m4 files would have better control on how much does it indent. But not really sure if even that would lead to something correct. 2019-01-12 Jakub Jelinek PR libfortran/88807 * m4/minloc0.m4: Reindent to avoid -Wmisleading-indentation warnings. * generated/minloc0_4_i1.c: Regenerated. * generated/minloc0_4_i2.c: Regenerated. * generated/minloc0_4_i4.c: Regenerated. * generated/minloc0_4_i8.c: Regenerated. * generated/minloc0_4_i16.c: Regenerated. * generated/minloc0_4_r4.c: Regenerated. * generated/minloc0_4_r8.c: Regenerated. * generated/minloc0_4_r10.c: Regenerated. * generated/minloc0_4_r16.c: Regenerated. * generated/minloc0_8_i1.c: Regenerated. * generated/minloc0_8_i2.c: Regenerated. * generated/minloc0_8_i4.c: Regenerated. * generated/minloc0_8_i8.c: Regenerated. * generated/minloc0_8_i16.c: Regenerated. * generated/minloc0_8_r4.c: Regenerated. * generated/minloc0_8_r8.c: Regenerated. * generated/minloc0_8_r10.c: Regenerated. * generated/minloc0_8_r16.c: Regenerated. * generated/minloc0_16_i1.c: Regenerated. * generated/minloc0_16_i2.c: Regenerated. * generated/minloc0_16_i4.c: Regenerated. * generated/minloc0_16_i8.c: Regenerated. * generated/minloc0_16_i16.c: Regenerated. * generated/minloc0_16_r4.c: Regenerated. * generated/minloc0_16_r8.c: Regenerated. * generated/minloc0_16_r10.c: Regenerated. * generated/minloc0_16_r16.c: Regenerated. --- libgfortran/m4/minloc0.m4.jj2019-01-01 12:38:37.03848 +0100 +++ libgfortran/m4/minloc0.m4 2019-01-12 01:11:14.779583814 +0100 @@ -63,27 +63,27 @@ FOREACH_FUNCTION( } else #endif -if (back) - do - { - if (unlikely (*base <= minval)) - { - minval = *base; - for (n = 0; n < rank; n++) - dest[n * dstride] = count[n] + 1; - } - base += sstride[0]; - } - while (++count[0] != extent[0]); -else - do -{ - if (unlikely (*base < minval)) - { - minval = *base; - for (n = 0; n < rank; n++) - dest[n * dstride] = count[n] + 1; - }') + if (back) + do + { + if (unlikely (*base <= minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base < minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + }') MASKED_FOREACH_FUNCTION( ` atype_name minval; int fast = 0; --- libgfortran/generated/minloc0_4_i1.c.jj 2019-01-01 12:38:33.561721355 +0100 +++ libgfortran/generated/minloc0_4_i1.c2019-01-12 01:11:21.469475297 +0100 @@ -123,27 +123,27 @@ minloc0_4_i1 (gfc_array_i4 * const restr } else #endif -if (back) - do - { - if (unlikely (*base <= minval)) - { - minval = *base; - for (n = 0; n < rank; n++) - dest[n * dstride] = count[n] + 1; - } - base += sstride[0]; - } - while (++count[0] != extent[0]); -else - do -{ - if (unlikely (*base < minval)) - { - minval = *base; - for (n = 0; n < rank; n++) - dest[n * dstride] = count[n] + 1; - } + if (back) + do + { + if (unlikely (*base <= minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base < minval)) + { + minval =