[PATCH] C-amily: Properly strip NOP_EXPR

2019-01-12 Thread H.J. Lu
gcc/c-family/

PR c/51628
* c-warn.c (warn_for_address_or_pointer_of_packed_member): Move
NOP_EXPR check to ...
(check_and_warn_address_of_packed_member): Here.

gcc/testsuite/

PR c/51628
* c-c++-common/pr51628-33.c: New test.
---
 gcc/c-family/c-warn.c   |  6 +++---
 gcc/testsuite/c-c++-common/pr51628-33.c | 19 +++
 2 files changed, 22 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/pr51628-33.c

diff --git a/gcc/c-family/c-warn.c b/gcc/c-family/c-warn.c
index 79b2d8ad449..070934ab2b6 100644
--- a/gcc/c-family/c-warn.c
+++ b/gcc/c-family/c-warn.c
@@ -2755,6 +2755,9 @@ check_and_warn_address_of_packed_member (tree type, tree 
rhs)
   while (TREE_CODE (rhs) == COMPOUND_EXPR)
rhs = TREE_OPERAND (rhs, 1);
 
+  if (TREE_CODE (rhs) == NOP_EXPR)
+   rhs = TREE_OPERAND (rhs, 0);
+
   tree context = check_address_of_packed_member (type, rhs);
   if (context)
{
@@ -2844,9 +2847,6 @@ warn_for_address_or_pointer_of_packed_member (bool 
convert_p, tree type,
   /* Get the type of the pointer pointing to.  */
   type = TREE_TYPE (type);
 
-  if (TREE_CODE (rhs) == NOP_EXPR)
-   rhs = TREE_OPERAND (rhs, 0);
-
   check_and_warn_address_of_packed_member (type, rhs);
 }
 }
diff --git a/gcc/testsuite/c-c++-common/pr51628-33.c 
b/gcc/testsuite/c-c++-common/pr51628-33.c
new file mode 100644
index 000..0092f32202f
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr51628-33.c
@@ -0,0 +1,19 @@
+/* PR c/51628.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+struct pair_t
+{
+  char x;
+  int i[4];
+} __attribute__ ((packed, aligned (4)));
+
+extern struct pair_t p;
+extern void bar (int *);
+
+void
+foo (struct pair_t *p)
+{
+  bar (p ? p->i : (int *) 0);
+/* { dg-warning "may result in an unaligned pointer value" "" { target *-*-* } 
.-1 } */
+}
-- 
2.20.1



[PATCH] C-family: Replace "may may" with "may" in warning message

2019-01-12 Thread H.J. Lu
gcc/c-family/

* c-warn.c (warn_for_address_or_pointer_of_packed_member):
Replace "may may" with "may" in warning message.

gcc/c-family/testsuite/

* gcc.dg/pr51628-20.c: Updated.
* gcc.dg/pr51628-21.c: Likewise.
* gcc.dg/pr51628-25.c: Likewise.
---
 gcc/c-family/c-warn.c | 2 +-
 gcc/testsuite/gcc.dg/pr51628-20.c | 2 +-
 gcc/testsuite/gcc.dg/pr51628-21.c | 2 +-
 gcc/testsuite/gcc.dg/pr51628-25.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/c-family/c-warn.c b/gcc/c-family/c-warn.c
index f84736716b6..79b2d8ad449 100644
--- a/gcc/c-family/c-warn.c
+++ b/gcc/c-family/c-warn.c
@@ -2828,7 +2828,7 @@ warn_for_address_or_pointer_of_packed_member (bool 
convert_p, tree type,
  location_t location = EXPR_LOC_OR_LOC (rhs, input_location);
  warning_at (location, OPT_Waddress_of_packed_member,
  "converting a packed %qT pointer (alignment %d) "
- "to %qT (alignment %d) may may result in an "
+ "to %qT (alignment %d) may result in an "
  "unaligned pointer value",
  rhstype, rhs_align, type, type_align);
  tree decl = TYPE_STUB_DECL (TREE_TYPE (rhstype));
diff --git a/gcc/testsuite/gcc.dg/pr51628-20.c 
b/gcc/testsuite/gcc.dg/pr51628-20.c
index 80888283b73..bcdbff1e554 100644
--- a/gcc/testsuite/gcc.dg/pr51628-20.c
+++ b/gcc/testsuite/gcc.dg/pr51628-20.c
@@ -8,4 +8,4 @@ struct C { struct B b; } __attribute__ ((packed));
 extern struct C *p;
 
 long* g8 (void) { return p; }
-/* { dg-warning "may may result in an unaligned pointer value" "" { target 
*-*-* } .-1 } */
+/* { dg-warning "may result in an unaligned pointer value" "" { target *-*-* } 
.-1 } */
diff --git a/gcc/testsuite/gcc.dg/pr51628-21.c 
b/gcc/testsuite/gcc.dg/pr51628-21.c
index 3077e72c8d5..0c7fab75d8a 100644
--- a/gcc/testsuite/gcc.dg/pr51628-21.c
+++ b/gcc/testsuite/gcc.dg/pr51628-21.c
@@ -8,4 +8,4 @@ struct C { struct B b; } __attribute__ ((packed));
 extern struct C p[];
 
 long* g8 (void) { return p; }
-/* { dg-warning "may may result in an unaligned pointer value" "" { target 
*-*-* } .-1 } */
+/* { dg-warning "may result in an unaligned pointer value" "" { target *-*-* } 
.-1 } */
diff --git a/gcc/testsuite/gcc.dg/pr51628-25.c 
b/gcc/testsuite/gcc.dg/pr51628-25.c
index 2fc5c028711..94a3a8fbaf2 100644
--- a/gcc/testsuite/gcc.dg/pr51628-25.c
+++ b/gcc/testsuite/gcc.dg/pr51628-25.c
@@ -6,4 +6,4 @@ struct B { int i; };
 struct C { struct B b; } __attribute__ ((packed));
 
 long* g8 (struct C *p) { return p; }
-/* { dg-warning "may may result in an unaligned pointer value" "" { target 
*-*-* } .-1 } */
+/* { dg-warning "may result in an unaligned pointer value" "" { target *-*-* } 
.-1 } */
-- 
2.20.1



Re: [patch, fortran] Fix the rest of PR 59345

2019-01-12 Thread Steve Kargl
On Sat, Jan 12, 2019 at 10:09:14PM +0100, Thomas Koenig wrote:
> Hello world,
> 
> this patch fixes the rest of the PR by making sure we do not
> pack/unpack for function results which are either allocatable
> or explicit shape arrays.
> 
> Regression-tested. OK for trunk?
> 

OK.

-- 
Steve


[patch,libgfortran] PR88776 Namelist read from stdin: loss of data

2019-01-12 Thread Jerry DeLisle

Hi all,

As stated in the PR, the problem turns out to be an ungraceful return 
after an error.  Most namelist errors go through nml_err_ret, The one I 
am removing did not and in the unique case of UNIT=5 after the error it 
falls through and hits some code which modifies pointers to the namelist 
data structures.


This patch fixes it.

Regression tested on x86-64 and manually tested with a redirection to 
stdin. (cat somefile | ./a.out )


I plan to commit today as simple along with a new testcase.

Regards.

Jerry

2019-01-12  Jerry DeLisle  

PR libfortran/88776
* io/list_read.c (namelist_read): Use nml_err_ret path on
read error, not based on stdin_unit.
diff --git a/libgfortran/io/list_read.c b/libgfortran/io/list_read.c
index 4a7ccb3ddd5..d9af255a034 100644
--- a/libgfortran/io/list_read.c
+++ b/libgfortran/io/list_read.c
@@ -3614,11 +3614,7 @@ find_nml_name:
   while (!dtp->u.p.input_complete)
 {
   if (!nml_get_obj_data (dtp, _nl, nml_err_msg, sizeof nml_err_msg))
-	{
-	  if (dtp->u.p.current_unit->unit_number != options.stdin_unit)
-	goto nml_err_ret;
-	  generate_error (>common, LIBERROR_READ_VALUE, nml_err_msg);
-}
+	goto nml_err_ret;
 
   /* Reset the previous namelist pointer if we know we are not going
 	 to be doing multiple reads within a single namelist object.  */


[PATCH 9/9] [nvptx] Enable setting vector length using -fopenacc-dim -- testcases

2019-01-12 Thread Tom de Vries
Add some test-cases that set vector length using -fopenacc-dim.

2019-01-12  Tom de Vries  

* testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: New test.
* testsuite/libgomp.oacc-fortran/gemm-2.f90: New test.
---
 .../libgomp.oacc-c-c++-common/pr85486-2.c  | 52 ++
 .../vector-length-128-2.c  | 39 +++
 .../vector-length-128-5.c  | 41 +++
 libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90  | 80 ++
 4 files changed, 212 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
new file mode 100644
index 000..f6ca263166d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=::128" } */
+
+/* Minimized from ref-1.C.  */
+
+#include 
+
+#pragma acc routine vector
+void __attribute__((noinline, noclone))
+Vector (int *ptr, int n, const int inc)
+{
+  #pragma acc loop vector
+  for (unsigned ix = 0; ix < n; ix++)
+ptr[ix] += inc;
+}
+
+int
+main (void)
+{
+  const int n = 32, m=32;
+
+  int ary[m][n];
+  unsigned ix,  iy;
+
+  for (ix = m; ix--;)
+for (iy = n; iy--;)
+  ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
+
+  int err = 0;
+
+#pragma acc parallel copy (ary)
+  {
+Vector ([0][0], m * n, (1 << 24) - (1 << 16));
+  }
+
+  for (ix = m; ix--;)
+for (iy = n; iy--;)
+  if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
+   {
+ printf ("ary[%u][%u] = %x expected %x\n",
+ ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
+ err++;
+   }
+
+  if (err)
+{
+  printf ("%d failed\n", err);
+  return 1;
+}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c
new file mode 100644
index 000..8b5b2a4a92d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=::128" } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include 
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+  for (unsigned int i = 0; i < n; ++i)
+{
+  a[i] = i % 3;
+  b[i] = i % 5;
+}
+
+#pragma acc parallel copyin (a,b) copyout (c)
+  {
+#pragma acc loop vector
+for (unsigned int i = 0; i < n; i++)
+  c[i] = a[i] + b[i];
+  }
+
+  for (unsigned int i = 0; i < n; ++i)
+if (c[i] != (i % 3) + (i % 5))
+  abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 
1, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, 
workers=1, vectors=128" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c
new file mode 100644
index 000..e60f1c28db4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=:2:128" } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include 
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+  for (unsigned int i = 0; i < n; ++i)
+{
+  a[i] = i % 3;
+  b[i] = i % 5;
+}
+
+#pragma acc parallel copyin (a,b) copyout (c)
+  {
+#pragma acc loop worker
+for (unsigned int i = 0; i < 4; i++)
+#pragma acc loop vector
+  for (unsigned int j = 0; j < n / 4; j++)
+   c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j];
+  }
+
+  for (unsigned int i = 0; i < n; ++i)
+if (c[i] != (i % 3) + (i % 5))
+  abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 
2, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, 
workers=2, vectors=128" } */
diff --git 

[PATCH 6/9] [nvptx] Force vl32 if calling vector-partitionable routines -- test-cases

2019-01-12 Thread Tom de Vries
Add test-cases for "[nvptx] Force vl32 if calling vector-partitionable
routines".

2018-12-17  Tom de Vries  

PR target/85486
* testsuite/libgomp.oacc-c-c++-common/pr85486-3.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr85486.c: New test.
---
 .../libgomp.oacc-c-c++-common/pr85486-3.c  | 54 ++
 .../testsuite/libgomp.oacc-c-c++-common/pr85486.c  | 51 
 2 files changed, 105 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
new file mode 100644
index 000..a959b90c29a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
@@ -0,0 +1,54 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */
+
+/* Minimized from ref-1.C.  */
+
+#include 
+
+#pragma acc routine vector
+void __attribute__((noinline, noclone))
+Vector (int *ptr, int n, const int inc)
+{
+  #pragma acc loop vector
+  for (unsigned ix = 0; ix < n; ix++)
+ptr[ix] += inc;
+}
+
+int
+main (void)
+{
+  const int n = 32, m=32;
+
+  int ary[m][n];
+  unsigned ix,  iy;
+
+  for (ix = m; ix--;)
+for (iy = n; iy--;)
+  ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
+
+  int err = 0;
+
+#pragma acc parallel copy (ary)
+  {
+Vector ([0][0], m * n, (1 << 24) - (1 << 16));
+  }
+
+  for (ix = m; ix--;)
+for (iy = n; iy--;)
+  if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
+   {
+ printf ("ary[%u][%u] = %x expected %x\n",
+ ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
+ err++;
+   }
+
+  if (err)
+{
+  printf ("%d failed\n", err);
+  return 1;
+}
+
+  return 0;
+}
+
+/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" 
} */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
new file mode 100644
index 000..99c08059d37
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+/* Minimized from ref-1.C.  */
+
+#include 
+
+#pragma acc routine vector
+void __attribute__((noinline, noclone))
+Vector (int *ptr, int n, const int inc)
+{
+  #pragma acc loop vector
+  for (unsigned ix = 0; ix < n; ix++)
+ptr[ix] += inc;
+}
+
+int
+main (void)
+{
+  const int n = 32, m=32;
+
+  int ary[m][n];
+  unsigned ix,  iy;
+
+  for (ix = m; ix--;)
+for (iy = n; iy--;)
+  ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
+
+  int err = 0;
+
+#pragma acc parallel copy (ary) vector_length (128) /* { dg-warning "using 
vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 
128" } */
+  {
+Vector ([0][0], m * n, (1 << 24) - (1 << 16));
+  }
+
+  for (ix = m; ix--;)
+for (iy = n; iy--;)
+  if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
+   {
+ printf ("ary[%u][%u] = %x expected %x\n",
+ ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
+ err++;
+   }
+
+  if (err)
+{
+  printf ("%d failed\n", err);
+  return 1;
+}
+
+  return 0;
+}
-- 
2.16.4



[PATCH 4/9] [nvptx] Enable large vectors -- reduction testcases

2019-01-12 Thread Tom de Vries
Add various reduction test-cases with vector length 128.

2018-12-17  Tom de Vries  

* testsuite/libgomp.oacc-c-c++-common/vred2d-128.c: New test.
* testsuite/libgomp.oacc-fortran/gemm.f90: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c: New test.
---
 .../vector-length-128-10.c | 39 +++
 .../libgomp.oacc-c-c++-common/vred2d-128.c | 55 +++
 libgomp/testsuite/libgomp.oacc-fortran/gemm.f90| 79 ++
 3 files changed, 173 insertions(+)
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm.f90

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c
new file mode 100644
index 000..0658cfde7ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include 
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+  for (unsigned int i = 0; i < n; ++i)
+{
+  a[i] = i % 3;
+  b[i] = i % 5;
+}
+
+  unsigned int res = 1;
+  unsigned long long res2 = 1;
+#pragma acc parallel vector_length (128) copyin (a,b) reduction (+:res, res2) 
copy (res, res2)
+  {
+#pragma acc loop vector reduction (+:res, res2)
+for (unsigned int i = 0; i < n; i++)
+  {
+   res += ((a[i] + b[i]) % 2);
+   res2 += ((a[i] + b[i]) % 2);
+  }
+  }
+
+  if (res != 478)
+abort ();
+  if (res2 != 478)
+abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
new file mode 100644
index 000..86171d456e0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
@@ -0,0 +1,55 @@
+/* Test large vector lengths.  */
+
+#include 
+
+#define n 1
+int a1[n], a2[n];
+
+#define gentest(name, outer, inner)\
+  void name () \
+  {\
+  long i, j, t1, t2, t3;   \
+  _Pragma(outer)   \
+  for (i = 0; i < n; i++)  \
+{  \
+  t1 = 0;  \
+  t2 = 0;  \
+  _Pragma(inner)   \
+  for (j = i; j < n; j++)  \
+   {   \
+ t1++; \
+ t2--; \
+   }   \
+  a1[i] = t1;  \
+  a2[i] = t2;  \
+}  \
+  for (i = 0; i < n; i++)  \
+{  \
+  assert (a1[i] == n-i);   \
+  assert (a2[i] == -(n-i));\
+}  \
+  }\
+
+gentest (test1, "acc parallel loop gang vector_length (128) firstprivate (t1, 
t2)",
+"acc loop vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test2, "acc parallel loop gang vector_length (128) firstprivate (t1, 
t2)",
+"acc loop worker vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test3, "acc parallel loop gang worker vector_length (128) 
firstprivate (t1, t2)",
+"acc loop vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test4, "acc parallel loop firstprivate (t1, t2)",
+"acc loop reduction(+:t1) reduction(-:t2)")
+
+
+int
+main ()
+{
+  test1 ();
+  test2 ();
+  test3 ();
+  test4 ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 
b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90
new file mode 100644
index 000..de78148c7b3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90
@@ -0,0 +1,79 @@
+! Exercise three levels of parallelism using SGEMM from BLAS.
+
+! { dg-do run }
+
+! Explicitly set vector_length to 128 using a vector_length clause.
+subroutine openacc_sgemm_128 (m, n, k, alpha, a, b, beta, c)
+  integer :: m, n, k
+  real :: alpha, beta
+  real :: a(k,*), b(k,*), c(m,*)
+
+  integer :: i, j, l
+  real :: temp
+
+  !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) 
vector_length (128) firstprivate (temp)
+  do j = 1, n
+ !$acc loop
+ do i = 1, m
+temp = 0.0
+!$acc loop reduction(+:temp)
+do l = 1, k
+   temp = temp + a(l,i)*b(l,j)
+end do
+if(beta == 0.0) then
+   c(i,j) = alpha*temp
+else
+   

[PATCH 7/9] [nvptx] Add vector_length 64 test-cases

2019-01-12 Thread Tom de Vries
Add some test-cases using vector_length 64.

2019-01-10  Tom de Vries  

* testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c: New test.
---
 .../libgomp.oacc-c-c++-common/vector-length-64-1.c  | 17 +
 .../libgomp.oacc-c-c++-common/vector-length-64-2.c  | 21 +
 .../libgomp.oacc-c-c++-common/vector-length-64-3.c  | 17 +
 3 files changed, 55 insertions(+)
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c
new file mode 100644
index 000..b6ee732f863
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c
@@ -0,0 +1,17 @@
+#include 
+#include 
+
+int
+main (void)
+{
+#pragma acc parallel vector_length (64) num_workers (16) /* { dg-warning 
"using num_workers \\(15\\), ignoring 16" "" { target 
openacc_nvidia_accel_configured } } */
+  {
+#pragma acc loop worker
+for (unsigned int i = 0; i < 32; i++)
+#pragma acc loop vector
+  for (unsigned int j = 0; j < 64; j++)
+   ;
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c
new file mode 100644
index 000..4dfbae8de91
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c
@@ -0,0 +1,21 @@
+/* { dg-set-target-env-var "GOMP_OPENACC_DIM" ":16:" } */
+/* { dg-shouldfail "" { openacc_nvidia_accel_selected } } */
+
+#include 
+#include 
+
+int
+main (void)
+{
+#pragma acc parallel vector_length (64)
+  {
+#pragma acc loop worker
+for (unsigned int i = 0; i < 32; i++)
+#pragma acc loop vector
+  for (unsigned int j = 0; j < 64; j++)
+   ;
+  }
+
+  return 0;
+}
+/* { dg-output "The Nvidia accelerator has insufficient barrier resources" { 
target openacc_nvidia_accel_selected } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c
new file mode 100644
index 000..1acb40e8357
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c
@@ -0,0 +1,17 @@
+#include 
+#include 
+
+int
+main (void)
+{
+#pragma acc parallel vector_length (64)
+  {
+#pragma acc loop worker
+for (unsigned int i = 0; i < 32; i++)
+#pragma acc loop vector
+  for (unsigned int j = 0; j < 64; j++)
+   ;
+  }
+
+  return 0;
+}
-- 
2.16.4



[PATCH 5/9] [nvptx] Don't emit barriers for empty loops -- test-cases

2019-01-12 Thread Tom de Vries
Add test-cases for PR85381.

2018-12-17  Tom de Vries  

PR target/85381
* testsuite/libgomp.oacc-c-c++-common/pr85381-5.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr85381.c: New test.
---
 .../libgomp.oacc-c-c++-common/pr85381-5.c  | 24 ++
 .../testsuite/libgomp.oacc-c-c++-common/pr85381.c  | 18 
 2 files changed, 42 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c
new file mode 100644
index 000..61e7e48f0c9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c
@@ -0,0 +1,24 @@
+/* { dg-additional-options "-save-temps" } */
+/* { dg-do run { target openacc_nvidia_accel_selected } }
+   { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+
+#define n 1024
+
+int
+main (void)
+{
+  #pragma acc parallel vector_length(128)
+  {
+#pragma acc loop vector
+for (int i = 0; i < n; i++)
+  ;
+
+#pragma acc loop vector
+for (int i = 0; i < n; i++)
+  ;
+  }
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "bar.sync" } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c
new file mode 100644
index 000..2864dfcf3cb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c
@@ -0,0 +1,18 @@
+/* { dg-additional-options "-save-temps" } */
+/* { dg-do run { target openacc_nvidia_accel_selected } }
+   { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+
+int
+main (void)
+{
+  int v1;
+
+  #pragma acc parallel vector_length (128)
+  #pragma acc loop vector
+  for (v1 = 0; v1 < 20; v1 += 2)
+;
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "bar.sync" } } */
-- 
2.16.4



[PATCH 8/9] [nvptx] Enable setting vector length using -fopenacc-dim

2019-01-12 Thread Tom de Vries
Enable setting vector length using -fopenacc-dim, f.i. -fopenacc-dim=::128.

2019-01-12  Tom de Vries  

* config/nvptx/nvptx.c (nvptx_goacc_validate_dims_1): Alow setting
vector length using -fopenacc-dim.

* plugin/plugin-nvptx.c (nvptx_exec): Update error message.
---
 gcc/config/nvptx/nvptx.c  | 3 ++-
 libgomp/plugin/plugin-nvptx.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 8d2740cd50f..03c0f82f4a2 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -5705,7 +5705,8 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int 
fn_level, unsigned used)
 
   if (oacc_default_dims_p)
 {
-  dims[GOMP_DIM_VECTOR] = default_vector_length;
+  if (dims[GOMP_DIM_VECTOR] < 0)
+   dims[GOMP_DIM_VECTOR] = default_vector_length;
   if (dims[GOMP_DIM_WORKER] < 0)
dims[GOMP_DIM_WORKER] = PTX_DEFAULT_RUNTIME_DIM;
   if (dims[GOMP_DIM_GANG] < 0)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 8912660966a..dd2bcf3083f 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1321,7 +1321,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, 
void **devaddrs,
   " region or '-fopenacc-dim=:x:' where x <= 15"
   "; "
   "or, recompile the program with 'vector_length = 32' on that"
-  " offloaded region"
+  " offloaded region or '-fopenacc-dim=::32'"
   ".\n");
GOMP_PLUGIN_fatal (msg, targ_fn->launch->fn, dims[GOMP_DIM_WORKER],
   dims[GOMP_DIM_VECTOR]);
-- 
2.16.4



[PATCH 0/9] [COVER-LETTER, nvptx] Add support for warp-multiple openacc vector length

2019-01-12 Thread Tom de Vries
I. Current state

The current openacc implementation sets vector length to warp-size.

There are two aspects that need to be implemented for an openacc implementation
to work: communication and synchronization.  Synchronization is needed at the
end of worker and vector loops.  Communication is needed at the start of worker
and vector loops, to propagate state that not has been calculated redundantly in
vector-single and worker-single mode to vector-partition and worker-partitioned
mode.

For worker loops, synchronization at the end of the loop is done using the
inter-warp synchronization instruction 'bar.sync 0'.  Communication is done
using a buffer in shared memory (and synchronization is used to ensure that the
buffer is used properly).

For vector loops with warp-sized vector length, synchronization at the end of
the loop is not needed, since warps are synchronized by definition.
Communication is done using the intra-warp communication instruction shfl.

These vector and worker schemes do not change if we nest a vector loop in a
worker loop.  OTOH, a vector-and-worker loop uses the worker scheme.

II. Patch series

This patch series adds the possibility to use warp-multiple openacc vector
length.

This means we can no longer rely on the same mechanisms for communication and
synchronization of vector loops, and need to apply the same ones as we do for
worker loops.

II.a Vector loop

A vector loop with warp-sized vector length looks as before.  A vector loop with
warp-multiple vector length looks like a simple worker loop.

II.b Vector-and-worker loop

A vector-and-worker loop with is handled as worker loop, as before.

II.c Vector loop in worker loop

A vector loop in worker loop with warp-sized vector length looks as before.

A vector loop in a worker loop with warp-multiple vector length is handled as
follows.

We use the 'bar.sync 0' instruction (which synchronizes all threads in a CTA)
for worker synchronization, but to synchronize only the warps that form a
vector together, we use 'bar.sync , ', where  uniquely
identifies the vector (we use the worker id, offset by one not to clash with
logical barrier resource '0' used by worker synchronization, so: %tid.y + 1).

Furthermore, the fact that vectors synchronize independently means that vector
state needs to be propagated independently.  We handle this by allocating a
state propagation buffer for each vector.  So, the shared memory buffer is
partitioned into a part for worker propagation, and num_worker parts for vector
propagation.

We'll name the first part worker-generic and the other parts worker-specific
(but we've got one vector per worker, so confusingly you might also call it
vector-specific).

In a vector loop in worker loop, we first transition from worker-single to
worker-partitioned, and then from vector-single to vector-partitioned, which
means state propagation from W0V0 to WAV0, and then state propagation from WAV0
to WAVA (using W for worker, V for vector, and A for all).
For branch condition propagation however, a condition calculated in
worker-single-vector-single mode is propagated from W0V0 to WAVA directly (so 
we use
the worker-generic buffer for that).

II.d Routines

There's a question on how to handle vector-partitionable routines in such a
scheme, given these can now be called from a context with a warp-multiple vector
length, while the current implementation of routines assumes warp-sized vector
length.  This patch series takes a conservative approach: keep routine
generation as is, and detect if we're calling a vector-partitionable routine
from an offloading region, and if so we fall back to warp-sized vector length
in that region.

III. Testing

Build and reg-tested on x86_64 with nvptx accelerator.

Build and reg-tested on x86_64 with nvptx accelerator with
PTX_DEFAULT_VECTOR_LENGTH set to various sizes.

IV. Patches

 1  [nvptx] Enable large vectors
 2  [nvptx] Update insufficient launch message for variable vector_length
 3  [nvptx] Enable large vectors -- test-cases
 4  [nvptx] Enable large vectors -- reduction testcases
 5  [nvptx] Don't emit barriers for empty loops -- test-cases
 6  [nvptx] Force vl32 if calling vector-partitionable routines -- 
test-cases
 7  [nvptx] Add vector_length 64 test-cases
 8  [nvptx] Enable setting vector length using -fopenacc-dim
 9  [nvptx] Enable setting vector length using -fopenacc-dim -- testcases


Tom de Vries (9):
  [nvptx] Enable large vectors
  [nvptx] Update insufficient launch message for variable vector_length
  [nvptx] Enable large vectors -- test-cases
  [nvptx] Enable large vectors -- reduction testcases
  [nvptx] Don't emit barriers for empty loops -- test-cases
  [nvptx] Force vl32 if calling vector-partitionable routines --
test-cases
  [nvptx] Add vector_length 64 test-cases
  [nvptx] Enable setting vector length using -fopenacc-dim
  [nvptx] Enable setting vector length using -fopenacc-dim -- testcases

 

[PATCH 1/9] [nvptx] Enable large vectors

2019-01-12 Thread Tom de Vries
Allow vector_length clauses to accept values larger than warp size.  Note that
this does not enable setting vector_length to values larger than warp size using
-fopenacc-dim.

2018-12-17  Tom de Vries  

* config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Take larger vector
lengths into account.

* testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c: Expect
vector length to be 128.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Expect vector
length 2097152 to be reduced to 1024 instead of 32.
---
 gcc/config/nvptx/nvptx.c  | 2 +-
 libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c   | 4 ++--
 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c | 5 ++---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 1d9704543d9..8d2740cd50f 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -96,7 +96,7 @@
 #define PTX_NUM_PER_WORKER_BARRIERS (PTX_CTA_NUM_BARRIERS - 
PTX_NUM_PER_CTA_BARRIERS)
 
 #define PTX_DEFAULT_VECTOR_LENGTH PTX_WARP_SIZE
-#define PTX_MAX_VECTOR_LENGTH PTX_WARP_SIZE
+#define PTX_MAX_VECTOR_LENGTH PTX_CTA_SIZE
 #define PTX_WORKER_LENGTH 32
 #define PTX_DEFAULT_RUNTIME_DIM 0 /* Defer to runtime.  */
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index 4a9854662cc..d7cd0461b53 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -350,7 +350,7 @@ int main ()
 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, 
vectors_max;
 gangs_min = workers_min = vectors_min = INT_MAX;
 gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (vectors_actual) /* { dg-warning "using 
vector_length \\(32\\), ignoring 2097152" "" { target 
openacc_nvidia_accel_configured } } */ \
+#pragma acc parallel copy (vectors_actual) /* { dg-warning "using 
vector_length \\(1024\\), ignoring 2097152" "" { target 
openacc_nvidia_accel_configured } } */ \
   vector_length (VECTORS)
 {
   if (acc_on_device (acc_device_host))
@@ -361,7 +361,7 @@ int main ()
   else if (acc_on_device (acc_device_nvidia))
{
  /* The GCC nvptx back end enforces vector_length (32).  */
- vectors_actual = 32;
+ vectors_actual = 1024;
}
   else
__builtin_abort ();
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
index fab5b0d25d1..18d77cc5ecb 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
@@ -33,7 +33,6 @@ main (void)
 
   return 0;
 }
-/* { dg-prune-output "using vector_length \\(32\\), ignoring 128" } */
 
-/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 
1, 32\\)" "oaccdevlow" } } */
-/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, 
workers=1, vectors=32" } */
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 
1, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, 
workers=1, vectors=128" } */
-- 
2.16.4



[PATCH 2/9] [nvptx] Update insufficient launch message for variable vector_length

2019-01-12 Thread Tom de Vries
Update message in nvptx libgomp plugin about insufficient resources to launch
kernel, to accommodate for the fact the vector_length can now be variable.

19-01-08  Tom de Vries  

* plugin/plugin-nvptx.c (nvptx_exec): Update insufficient hardware
resources diagnostic.
---
 libgomp/plugin/plugin-nvptx.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index c80da64c422..8912660966a 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1297,14 +1297,16 @@ nvptx_exec (void (*fn), size_t mapnum, void 
**hostaddrs, void **devaddrs,
   if (dims[GOMP_DIM_WORKER] * dims[GOMP_DIM_VECTOR]
   > targ_fn->max_threads_per_block)
 {
-  int suggest_workers
-   = targ_fn->max_threads_per_block / dims[GOMP_DIM_VECTOR];
-  GOMP_PLUGIN_fatal ("The Nvidia accelerator has insufficient resources to"
-" launch '%s' with num_workers = %d; recompile the"
-" program with 'num_workers = %d' on that offloaded"
-" region or '-fopenacc-dim=:%d'",
-targ_fn->launch->fn, dims[GOMP_DIM_WORKER],
-suggest_workers, suggest_workers);
+  const char *msg
+   = ("The Nvidia accelerator has insufficient resources to launch '%s'"
+  " with num_workers = %d and vector_length = %d"
+  "; "
+  "recompile the program with 'num_workers = x and vector_length = y'"
+  " on that offloaded region or '-fopenacc-dim=:x:y' where"
+  " x * y <= %d"
+  ".\n");
+  GOMP_PLUGIN_fatal (msg, targ_fn->launch->fn, dims[GOMP_DIM_WORKER],
+dims[GOMP_DIM_VECTOR], targ_fn->max_threads_per_block);
 }
 
   /* Check if the accelerator has sufficient barrier resources to
-- 
2.16.4



[PATCH 3/9] [nvptx] Enable large vectors -- test-cases

2019-01-12 Thread Tom de Vries
Add various test-cases with vector length 128.

2018-12-17  Tom de Vries  

* testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c: New test.
---
 .../vector-length-128-4.c  | 40 +
 .../vector-length-128-6.c  | 41 ++
 .../vector-length-128-7.c  | 40 +
 3 files changed, 121 insertions(+)
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c
 create mode 100644 
libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c
new file mode 100644
index 000..e5d1df09b8a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c
@@ -0,0 +1,40 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include 
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+  for (unsigned int i = 0; i < n; ++i)
+{
+  a[i] = i % 3;
+  b[i] = i % 5;
+}
+
+#pragma acc parallel num_workers (2) vector_length (128) copyin (a,b) copyout 
(c)
+  {
+#pragma acc loop worker
+for (unsigned int i = 0; i < 4; i++)
+#pragma acc loop vector
+  for (unsigned int j = 0; j < n / 4; j++)
+   c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j];
+  }
+
+  for (unsigned int i = 0; i < n; ++i)
+if (c[i] != (i % 3) + (i % 5))
+  abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 
2, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, 
workers=2, vectors=128" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c
new file mode 100644
index 000..a1f67622f84
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-set-target-env-var "GOMP_OPENACC_DIM" ":2:" } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include 
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+  for (unsigned int i = 0; i < n; ++i)
+{
+  a[i] = i % 3;
+  b[i] = i % 5;
+}
+
+#pragma acc parallel vector_length (128) copyin (a,b) copyout (c)
+  {
+#pragma acc loop worker
+for (unsigned int i = 0; i < 4; i++)
+#pragma acc loop vector
+  for (unsigned int j = 0; j < n / 4; j++)
+   c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j];
+  }
+
+  for (unsigned int i = 0; i < n; ++i)
+if (c[i] != (i % 3) + (i % 5))
+  abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 
0, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, 
workers=2, vectors=128" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c
new file mode 100644
index 000..c419f6499b5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c
@@ -0,0 +1,40 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include 
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+  for (unsigned int i = 0; i < n; ++i)
+{
+  a[i] = i % 3;
+  b[i] = i % 5;
+}
+
+#pragma acc parallel vector_length (128) copyin (a,b) copyout (c)
+  {
+#pragma acc loop worker
+for (unsigned int i = 0; i < 4; i++)
+#pragma acc loop vector
+  for (unsigned int j = 0; j < n / 4; j++)
+   c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j];
+  }
+
+  for (unsigned int i = 0; i < n; ++i)
+if (c[i] != (i % 3) + (i % 5))
+  abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 
0, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, 
workers=8, vectors=128" } */
-- 
2.16.4



Re: Add split_stack support for GNU/Hurd

2019-01-12 Thread Ian Lance Taylor
On Sat, Jan 12, 2019 at 11:18 AM Svante Signell
 wrote:
>
> I order to prepare for inclusion of gccgo to GNU/Hurd split stack support has
> been proposed, and in reality this patch has been applied to Debian since 
> gcc-6.
>
> Please commit this patch. Late on updated patches for gccgo in gcc-8 and 
> gcc-9?
> will be reviewed and committed with the aid of Ian Lance Taylor and Matthis
> Klose.

Committed to trunk.

Thanks.

Ian


[patch, fortran] Fix the rest of PR 59345

2019-01-12 Thread Thomas Koenig

Hello world,

this patch fixes the rest of the PR by making sure we do not
pack/unpack for function results which are either allocatable
or explicit shape arrays.

Regression-tested. OK for trunk?

Regards

Thomas

2019-01-12  Thomas Koenig  

PR fortran/59345
* trans-array.c (gfc_conv_array_parameter): Remove TODO.  Do not
pack/unpack results of functions which return an explicit-shaped
or allocatable array.

2019-01-12  Thomas Koenig  

PR fortran/59345
* gfortran.dg/internal_pack_17.f90: New test.
* gfortran.dg/alloc_comp_auto_array_3.f90: Adjust number of calls
to builtin_free.
Index: trans-array.c
===
--- trans-array.c	(Revision 267829)
+++ trans-array.c	(Arbeitskopie)
@@ -7740,7 +7740,6 @@ array_parameter_size (tree desc, gfc_expr *expr, t
 }
 
 /* Convert an array for passing as an actual parameter.  */
-/* TODO: Optimize passing g77 arrays.  */
 
 void
 gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77,
@@ -7866,11 +7865,23 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr *
 
   no_pack = contiguous && no_pack;
 
-  /* If we have an expression, an array temporary will be
- generated which does not need to be packed / unpacked
- if passed to an explicit-shape dummy array.  */
+  /* If we have an EXPR_OP or a function returning an explicit-shaped
+ or allocatable array, an array temporary will be generated which
+ does not need to be packed / unpacked if passed to an
+ explicit-shape dummy array.  */
 
-  no_pack = no_pack || (g77 && expr->expr_type == EXPR_OP);
+  if (g77)
+{
+  if (expr->expr_type == EXPR_OP)
+	no_pack = 1;
+  else if (expr->expr_type == EXPR_FUNCTION && expr->value.function.esym)
+	{
+	  gfc_symbol *result = expr->value.function.esym->result;
+	  if (result->attr.dimension
+	  && (result->as->type == AS_EXPLICIT || result->attr.allocatable))
+	no_pack = 1;
+	}
+}
 
   /* Array constructors are always contiguous and do not need packing.  */
   array_constructor = g77 && !this_array_result && expr->expr_type == EXPR_ARRAY;
! { dg-do compile }
! { dg-additional-options "-fdump-tree-original" }
! PR 59345 - pack/unpack was not needed here.
! Original test case by Joost VandeVondele 
SUBROUTINE S1(A)
 INTERFACE
   FUNCTION CONTIGUOUS_F1() RESULT(res)
INTEGER :: res(5)
   END FUNCTION
 END INTERFACE
 CALL S2(CONTIGUOUS_F1())
END SUBROUTINE

SUBROUTINE S3(A)
 INTERFACE
   FUNCTION CONTIGOUOS_F2() RESULT(res)
INTEGER, ALLOCATABLE :: res(:)
   END FUNCTION
 END INTERFACE
 PROCEDURE(CONTIGOUOS_F2), POINTER :: A
 CALL S2(A())
END SUBROUTINE
! { dg-final { scan-tree-dump-not "_gfortran_internal_pack" "original" } }
! { dg-final { scan-tree-dump-not "_gfortran_internal_unpack" "original" } }


Re: [PATCH, d] Add README for process contributing to dmd and phobos

2019-01-12 Thread Iain Buclaw
On Thu, 10 Jan 2019 at 18:26, Joseph Myers  wrote:
>
> On Thu, 10 Jan 2019, Iain Buclaw wrote:
>
> > Hi,
> >
> > Joseph made mention that there isn't a readme documenting where
> > changes to d/dmd, libphobos/libdruntime, and libphobos/src should go.
> >
> > I hope this clears things up.  OK for trunk?
>
> This sort of patch is clearly covered by D maintainership.
>

OK. Thought it best to wait before committing, as the content is not
meant for myself to understand.

-- 
Iain


Add split_stack support for GNU/Hurd

2019-01-12 Thread Svante Signell
Hello,

I order to prepare for inclusion of gccgo to GNU/Hurd split stack support has
been proposed, and in reality this patch has been applied to Debian since gcc-6.

Please commit this patch. Late on updated patches for gccgo in gcc-8 and gcc-9?
will be reviewed and committed with the aid of Ian Lance Taylor and Matthis
Klose.

Thanks!

gcc/config/ChangeLog

2018-10-10  Svante Signell 
  * gcc/config/i386/gnu.h: Enable split-stack support

Index: gcc-snapshot-20181019-1.1/src/gcc/config/i386/gnu.h
===
--- gcc-snapshot-20181019-1.1.orig/src/gcc/config/i386/gnu.h
+++ gcc-snapshot-20181019-1.1/src/gcc/config/i386/gnu.h
@@ -37,11 +37,14 @@ along with GCC.  If not, see 

Re: ISO_Fortran_binding patch

2019-01-12 Thread Paul Richard Thomas
Done as revision 267884.

Thanks again.

Paul

On Sat, 12 Jan 2019 at 18:29, Paul Richard Thomas
 wrote:
>
> Hi Steve,
>
> Many thanks for the heads up. I had seen similar problems with the the
> second testcase and I thought that I had fixed them. I will delete
> them from the tree and will do more work to fix the problem(s).
>
> Cheers
>
> Paul
>
> On Sat, 12 Jan 2019 at 17:17, Steve Kargl
>  wrote:
> >
> > On Sat, Jan 12, 2019 at 09:10:27AM -0800, Steve Kargl wrote:
> > > On Sat, Jan 12, 2019 at 03:28:02PM +, Paul Richard Thomas wrote:
> > > > Hi Thomas,
> > > >
> > > > Committed as revision 267881. I removed the duplicate include file and
> > > > added some documentation, as suggested.
> > > >
> > > > Many thanks for all the help
> > > >
> > >
> > > Paul,
> > >
> > > I'm seeing the following failures.  Note, I have my uncommitted
> > > ENTRY patch in my tree.  I won't be able to investigate for about
> > > 30 minutes.
> > >
> > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O0  execution test
> > > Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/debug/debug.exp ...
> > > Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/dg.exp ...
> > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O2  execution test
> > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O3 -fomit-frame-pointer 
> > > -funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
> > > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O3 -g  execution test
> > >
> >
> > Regression testing finished faster than I thought.  Doing
> >
> > % gmake check-fortran RUNTESTFLAGS="dg.exp=ISO_Fortran_binding_2.f90"
> > ...
> > === gfortran Summary ===
> >
> > # of expected passes8
> > # of unexpected failures4
> >
> > The first failure in the gfortran.log file is
> >
> > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
> > subscripts[0] = 3.
> > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
> > subscripts[0] = -1.
> > CFI_address: base address of C Descriptor must not be NULL.
> > CFI_deallocate: Base address is already NULL.
> > CFI_deallocate: C Descriptor must describe a pointer or allocatable object.
> > CFI_allocate: Base address of C descriptor must be NULL.
> > CFI_allocate: The object of the C descriptor must be a pointer or 
> > allocatable variable.
> > CFI_establish: Rank must be between 0 and 15, 0 < rank (0 !< 16).
> > CFI_establish: If the C Descriptor represents an allocatable variable 
> > (dv->attribute = 1), its base address must be NULL (dv->base_addr = NULL).
> > CFI_establish: If base address is not NULL (base_addr != NULL), the 
> > established C descriptor is for a nonallocatable entity (attribute != 1).
> > CFI_is_contiguous: Base address of C Descriptor is already NULL.
> > CFI_is_contiguous: C Descriptor must describe an array (0 < dv->rank = 0).
> > CFI_section: Base address of source must not be NULL.
> > CFI_section: Source must describe an array (0 < source->rank, 0 !< 0).
> > CFI_section: Rank of result must be equal to the rank of source minus the 
> > number of zeros in strides (result->rank = source->rank - zero_count, 1 != 
> > 1 - 1).
> > CFI_section: Lower bounds must be within the bounds of the fortran array 
> > (source->dim[0].lower_bound <= lower_bounds[0] <= 
> > source->dim[0].lower_bound + source->dim[0].extent - 1, 0 <= -1 <= 99).
> > CFI_section: Lower bounds must be within the bounds of the fortran array 
> > (source->dim[0].lower_bound <= lower_bo
> > unds[0] <= source->dim[0].lower_bound + source->dim[0].extent - 1, 0 <= 100 
> > <= 99).
> >
> > Program received signal SIGSEGV: Segmentation fault - invalid memory 
> > reference.
> >
> > Backtrace for this error:
> > #0  0x71a2 in ???
> > #1  0x0 in ???
> >
> > The 2nd, 3rd, and 4th failures are
> >
> > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
> > subscripts[0] = 3.
> > CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
> > subscripts[0] = -1.
> > CFI_address: base address of C Descriptor must not be NULL.
> > CFI_deallocate: Base address is already NULL.
> >
> > Program received signal SIGFPE: Floating-point exception - erroneous 
> > arithmetic operation.
> >
> > Backtrace for this error:
> > #0  0x71a2 in ???
> > #1  0x400eed in ???
> > #2  0x4021ea in _start
> > at /usr/src/lib/csu/amd64/crt1.c:76
> > #3  0x200628fff in ???
> >
> >
> > --
> > Steve
>
>
>
> --
> "If you can't explain it simply, you don't understand it well enough"
> - Albert Einstein



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Set inline-unit-growth to 40

2019-01-12 Thread Jan Hubicka
Hello,
this patch sets inline-unit-growth to 40.  The performance changes are
- Firefox, LTO
  
https://treeherder.mozilla.org/perf.html#/compare?originalProject=try=f7bd026e1a931b9a284d1c85c2577a72dd592820=try=74889968abcc688b8d161863566ed273c0401ee4=1=opt=1=1
  After fixes to inlining priorities this makes difference without
  profile feedback only.

  Code size growth is about 9.15% with LTO and 3.95 with LTO and profile
  feedback.
- Firefox noLTO
  
https://treeherder.mozilla.org/perf.html#/compare?originalProject=try=c902b72340a3dca3114f58578c1c8f3e6a1cd89c=try=4974da6f92c144a9c09765b56a564a640069ddb9=1=1=1
  With about 7% code size growth
- SPEC
  
https://lnt.opensuse.org/db_default/v4/CPP/latest_runs_report?num_runs=10_percentage_change=0.02=46e2bd1143b5c60af814916d7673879b34ceb3f6%2Cc0d79cfe9c4ec30823480f2f9b256600e8e3899f
- C++ benchmarks
  
https://lnt.opensuse.org/db_default/v4/SPEC/latest_runs_report?num_runs=10_changes=on_percentage_change=0.02=46e2bd1143b5c60af814916d7673879b34ceb3f6%2Cc0d79cfe9c4ec30823480f2f9b256600e8e3899f

I am not entirely happy about the code-size/performance tradeoffs but it
is concerned only for programs built with -O3 or having too many inline
keywords.  I have looked into inlining decisions for Firefox, HHVM and
Clang and inliner gets out of growt bounds way too early and some of
more performance aware projects already sets the limit up.

I will tune other metrics down to handle some of the code size problems.

Honza

Index: ChangeLog
===
--- ChangeLog   (revision 267882)
+++ ChangeLog   (working copy)
@@ -1,3 +1,7 @@
+2019-01-05  Jan Hubicka  
+
+   * params.def (inline-unit-growth): Set to 40.
+
 2019-01-12  Jakub Jelinek  
 
* tree-ssa-loop-ivopts.c (find_inv_vars): Fix a comment typo.
Index: params.def
===
--- params.def  (revision 267882)
+++ params.def  (working copy)
@@ -227,7 +227,7 @@ DEFPARAM(PARAM_LARGE_UNIT_INSNS,
 DEFPARAM(PARAM_INLINE_UNIT_GROWTH,
 "inline-unit-growth",
 "How much can given compilation unit grow because of the inlining (in 
percent).",
-20, 0, 0)
+40, 0, 0)
 DEFPARAM(PARAM_IPCP_UNIT_GROWTH,
 "ipcp-unit-growth",
 "How much can given compilation unit grow because of the 
interprocedural constant propagation (in percent).",


Re: ISO_Fortran_binding patch

2019-01-12 Thread Paul Richard Thomas
Hi Steve,

Many thanks for the heads up. I had seen similar problems with the the
second testcase and I thought that I had fixed them. I will delete
them from the tree and will do more work to fix the problem(s).

Cheers

Paul

On Sat, 12 Jan 2019 at 17:17, Steve Kargl
 wrote:
>
> On Sat, Jan 12, 2019 at 09:10:27AM -0800, Steve Kargl wrote:
> > On Sat, Jan 12, 2019 at 03:28:02PM +, Paul Richard Thomas wrote:
> > > Hi Thomas,
> > >
> > > Committed as revision 267881. I removed the duplicate include file and
> > > added some documentation, as suggested.
> > >
> > > Many thanks for all the help
> > >
> >
> > Paul,
> >
> > I'm seeing the following failures.  Note, I have my uncommitted
> > ENTRY patch in my tree.  I won't be able to investigate for about
> > 30 minutes.
> >
> > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O0  execution test
> > Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/debug/debug.exp ...
> > Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/dg.exp ...
> > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O2  execution test
> > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O3 -fomit-frame-pointer 
> > -funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
> > FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O3 -g  execution test
> >
>
> Regression testing finished faster than I thought.  Doing
>
> % gmake check-fortran RUNTESTFLAGS="dg.exp=ISO_Fortran_binding_2.f90"
> ...
> === gfortran Summary ===
>
> # of expected passes8
> # of unexpected failures4
>
> The first failure in the gfortran.log file is
>
> CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
> subscripts[0] = 3.
> CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
> subscripts[0] = -1.
> CFI_address: base address of C Descriptor must not be NULL.
> CFI_deallocate: Base address is already NULL.
> CFI_deallocate: C Descriptor must describe a pointer or allocatable object.
> CFI_allocate: Base address of C descriptor must be NULL.
> CFI_allocate: The object of the C descriptor must be a pointer or allocatable 
> variable.
> CFI_establish: Rank must be between 0 and 15, 0 < rank (0 !< 16).
> CFI_establish: If the C Descriptor represents an allocatable variable 
> (dv->attribute = 1), its base address must be NULL (dv->base_addr = NULL).
> CFI_establish: If base address is not NULL (base_addr != NULL), the 
> established C descriptor is for a nonallocatable entity (attribute != 1).
> CFI_is_contiguous: Base address of C Descriptor is already NULL.
> CFI_is_contiguous: C Descriptor must describe an array (0 < dv->rank = 0).
> CFI_section: Base address of source must not be NULL.
> CFI_section: Source must describe an array (0 < source->rank, 0 !< 0).
> CFI_section: Rank of result must be equal to the rank of source minus the 
> number of zeros in strides (result->rank = source->rank - zero_count, 1 != 1 
> - 1).
> CFI_section: Lower bounds must be within the bounds of the fortran array 
> (source->dim[0].lower_bound <= lower_bounds[0] <= source->dim[0].lower_bound 
> + source->dim[0].extent - 1, 0 <= -1 <= 99).
> CFI_section: Lower bounds must be within the bounds of the fortran array 
> (source->dim[0].lower_bound <= lower_bo
> unds[0] <= source->dim[0].lower_bound + source->dim[0].extent - 1, 0 <= 100 
> <= 99).
>
> Program received signal SIGSEGV: Segmentation fault - invalid memory 
> reference.
>
> Backtrace for this error:
> #0  0x71a2 in ???
> #1  0x0 in ???
>
> The 2nd, 3rd, and 4th failures are
>
> CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
> subscripts[0] = 3.
> CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
> subscripts[0] = -1.
> CFI_address: base address of C Descriptor must not be NULL.
> CFI_deallocate: Base address is already NULL.
>
> Program received signal SIGFPE: Floating-point exception - erroneous 
> arithmetic operation.
>
> Backtrace for this error:
> #0  0x71a2 in ???
> #1  0x400eed in ???
> #2  0x4021ea in _start
> at /usr/src/lib/csu/amd64/crt1.c:76
> #3  0x200628fff in ???
>
>
> --
> Steve



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: [wwwdocs] Add __cpp_* feature macros to C++20 entries + other changes that have those in projects/cxx_status.html

2019-01-12 Thread Jakub Jelinek
On Sat, Jan 12, 2019 at 04:03:57PM +0100, Gerald Pfeifer wrote:
> On Sat, 12 Jan 2019, Jakub Jelinek wrote:
> > Ok for wwwdocs (or do you suggest something different for the P0941R2
> > imlementation status)?
> 
> I think that was a question to Jason and Jonathan wrt contents?

Yeah.

> Markup-wise this looks fine, and adding those links looks like a 
> good idea.

Thanks.

Jakub


Re: ISO_Fortran_binding patch

2019-01-12 Thread Steve Kargl
On Sat, Jan 12, 2019 at 09:10:27AM -0800, Steve Kargl wrote:
> On Sat, Jan 12, 2019 at 03:28:02PM +, Paul Richard Thomas wrote:
> > Hi Thomas,
> > 
> > Committed as revision 267881. I removed the duplicate include file and
> > added some documentation, as suggested.
> > 
> > Many thanks for all the help
> > 
> 
> Paul,
> 
> I'm seeing the following failures.  Note, I have my uncommitted
> ENTRY patch in my tree.  I won't be able to investigate for about
> 30 minutes.
> 
> FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O0  execution test
> Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/debug/debug.exp ...
> Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/dg.exp ...
> FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O2  execution test
> FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O3 -fomit-frame-pointer 
> -funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
> FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O3 -g  execution test
> 

Regression testing finished faster than I thought.  Doing

% gmake check-fortran RUNTESTFLAGS="dg.exp=ISO_Fortran_binding_2.f90"
...
=== gfortran Summary ===

# of expected passes8
# of unexpected failures4

The first failure in the gfortran.log file is

CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
subscripts[0] = 3.
CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
subscripts[0] = -1.
CFI_address: base address of C Descriptor must not be NULL.
CFI_deallocate: Base address is already NULL.
CFI_deallocate: C Descriptor must describe a pointer or allocatable object.
CFI_allocate: Base address of C descriptor must be NULL.
CFI_allocate: The object of the C descriptor must be a pointer or allocatable 
variable.
CFI_establish: Rank must be between 0 and 15, 0 < rank (0 !< 16).
CFI_establish: If the C Descriptor represents an allocatable variable 
(dv->attribute = 1), its base address must be NULL (dv->base_addr = NULL).
CFI_establish: If base address is not NULL (base_addr != NULL), the established 
C descriptor is for a nonallocatable entity (attribute != 1).
CFI_is_contiguous: Base address of C Descriptor is already NULL.
CFI_is_contiguous: C Descriptor must describe an array (0 < dv->rank = 0).
CFI_section: Base address of source must not be NULL.
CFI_section: Source must describe an array (0 < source->rank, 0 !< 0).
CFI_section: Rank of result must be equal to the rank of source minus the 
number of zeros in strides (result->rank = source->rank - zero_count, 1 != 1 - 
1).
CFI_section: Lower bounds must be within the bounds of the fortran array 
(source->dim[0].lower_bound <= lower_bounds[0] <= source->dim[0].lower_bound + 
source->dim[0].extent - 1, 0 <= -1 <= 99).
CFI_section: Lower bounds must be within the bounds of the fortran array 
(source->dim[0].lower_bound <= lower_bo
unds[0] <= source->dim[0].lower_bound + source->dim[0].extent - 1, 0 <= 100 <= 
99).

Program received signal SIGSEGV: Segmentation fault - invalid memory reference.

Backtrace for this error:
#0  0x71a2 in ???
#1  0x0 in ???

The 2nd, 3rd, and 4th failures are

CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
subscripts[0] = 3.
CFI_address: subscripts[0], is out of bounds. dv->dim[0].extent = 3 
subscripts[0] = -1.
CFI_address: base address of C Descriptor must not be NULL.
CFI_deallocate: Base address is already NULL.

Program received signal SIGFPE: Floating-point exception - erroneous arithmetic 
operation.

Backtrace for this error:
#0  0x71a2 in ???
#1  0x400eed in ???
#2  0x4021ea in _start
at /usr/src/lib/csu/amd64/crt1.c:76
#3  0x200628fff in ???


-- 
Steve


Re: ISO_Fortran_binding patch

2019-01-12 Thread Steve Kargl
On Sat, Jan 12, 2019 at 03:28:02PM +, Paul Richard Thomas wrote:
> Hi Thomas,
> 
> Committed as revision 267881. I removed the duplicate include file and
> added some documentation, as suggested.
> 
> Many thanks for all the help
> 

Paul,

I'm seeing the following failures.  Note, I have my uncommitted
ENTRY patch in my tree.  I won't be able to investigate for about
30 minutes.

FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O0  execution test
Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/debug/debug.exp ...
Running /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/dg.exp ...
FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O2  execution test
FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
FAIL: gfortran.dg/ISO_Fortran_binding_2.f90   -O3 -g  execution test


-- 
Steve


[committed] Fix 2 comment typos

2019-01-12 Thread Jakub Jelinek
Hi!

While is't is a contraction of "is it", it doesn't make sense in either of
these spots and I believe isn't was meant there instead.

Committed as obvious to trunk.

2019-01-12  Jakub Jelinek  

* tree-ssa-loop-ivopts.c (find_inv_vars): Fix a comment typo.

* c-typeck.c (convert_for_assignment): Fix a comment typo.

--- gcc/tree-ssa-loop-ivopts.c.jj   2019-01-10 11:43:08.936467241 +0100
+++ gcc/tree-ssa-loop-ivopts.c  2019-01-12 16:48:09.834963620 +0100
@@ -3037,7 +3037,7 @@ find_inv_vars (struct ivopts_data *data,
It's hard to make decision whether constant part should be stripped
or not.  We choose to not strip based on below facts:
  1) We need to count ADD cost for constant part if it's stripped,
-   which is't always trivial where this functions is called.
+   which isn't always trivial where this functions is called.
  2) Stripping constant away may be conflict with following loop
invariant hoisting pass.
  3) Not stripping constant away results in more invariant exprs,
--- gcc/c/c-typeck.c.jj 2019-01-01 12:37:48.607458942 +0100
+++ gcc/c/c-typeck.c2019-01-12 17:18:52.727211448 +0100
@@ -7283,7 +7283,7 @@ convert_for_assignment (location_t locat
}
}
 
-  /* If RHS is't an address, check pointer or array of packed
+  /* If RHS isn't an address, check pointer or array of packed
 struct or union.  */
   warn_for_address_or_pointer_of_packed_member
(TREE_CODE (orig_rhs) != ADDR_EXPR, type, orig_rhs);

Jakub


[PATCH, testsuite] Skip new charset tests on Darwin8-10.

2019-01-12 Thread Iain Sandoe
Hi,

These earlier Darwin versions have “FP_≈” inside a comment in 
architecture/{ppc,i386}/math.h, which is included by math.h which causes the 
tests to fail.

The intent of the tests (i.e. to ensure that the library itself does not emit 
non-ascii) is covered by other platforms, including later Darwin editions.  
AFAICT, this issue was fixed from Darwin11 onwards (although I have not tested 
every edition / looked for other possible non-ascii cases, in other headers).

Since there’s no expectation that the headers would ever be updated, and it 
doesn’t seem worth applying fixincludes for this, let’s skip the tests on 
versions with the issue.

Tested on powerpc-darwin9, x86_64-darwin10 and x86_64-darwin18.

OK for trunk?
Iain

libstdc++v3/testsuite/

* 17_intro/headers/c++1998/charset.cc: Skip for Darwin8 to Darwin10.
* 17_intro/headers/c++2011/charset.cc: Likewise.
* 17_intro/headers/c++2014/charset.cc: Likewise.
* 17_intro/headers/c++2017/charset.cc: Likewise.
* 17_intro/headers/c++2020/charset.cc: Likewise.


diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++1998/charset.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++1998/charset.cc
index 864c64e..4425e1c 100644
--- a/libstdc++-v3/testsuite/17_intro/headers/c++1998/charset.cc
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++1998/charset.cc
@@ -1,4 +1,5 @@
 // { dg-options "-finput-charset=ascii" }
 // { dg-do compile }
+// { dg-skip-if "non-ascii in system headers" { *-*-darwin10*  *-*-darwin[89]* 
} }
 
 #include 
diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2011/charset.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++2011/charset.cc
index 864c64e..4425e1c 100644
--- a/libstdc++-v3/testsuite/17_intro/headers/c++2011/charset.cc
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++2011/charset.cc
@@ -1,4 +1,5 @@
 // { dg-options "-finput-charset=ascii" }
 // { dg-do compile }
+// { dg-skip-if "non-ascii in system headers" { *-*-darwin10*  *-*-darwin[89]* 
} }
 
 #include 
diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2014/charset.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++2014/charset.cc
index 864c64e..4425e1c 100644
--- a/libstdc++-v3/testsuite/17_intro/headers/c++2014/charset.cc
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++2014/charset.cc
@@ -1,4 +1,5 @@
 // { dg-options "-finput-charset=ascii" }
 // { dg-do compile }
+// { dg-skip-if "non-ascii in system headers" { *-*-darwin10*  *-*-darwin[89]* 
} }
 
 #include 
diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2017/charset.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++2017/charset.cc
index 864c64e..4425e1c 100644
--- a/libstdc++-v3/testsuite/17_intro/headers/c++2017/charset.cc
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++2017/charset.cc
@@ -1,4 +1,5 @@
 // { dg-options "-finput-charset=ascii" }
 // { dg-do compile }
+// { dg-skip-if "non-ascii in system headers" { *-*-darwin10*  *-*-darwin[89]* 
} }
 
 #include 
diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2020/charset.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++2020/charset.cc
index 864c64e..4425e1c 100644
--- a/libstdc++-v3/testsuite/17_intro/headers/c++2020/charset.cc
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++2020/charset.cc
@@ -1,4 +1,5 @@
 // { dg-options "-finput-charset=ascii" }
 // { dg-do compile }
+// { dg-skip-if "non-ascii in system headers" { *-*-darwin10*  *-*-darwin[89]* 
} }
 
 #include 



[wwwdocs] svnwrite.html - reduce references to SVN

2019-01-12 Thread Gerald Pfeifer
When migrating from CVS to SVN I already reduced direct references
to CVS/SVN, and expecting a migration to GIT in this decade let me
take the opportunity to generalize/simplify things a bit further.

Committed.

(Perhaps we should also start refering to the more general term
"commit" vs "check in"?)

Gerald

Index: svnwrite.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/svnwrite.html,v
retrieving revision 1.42
diff -u -r1.42 svnwrite.html
--- svnwrite.html   30 Sep 2018 14:38:47 -  1.42
+++ svnwrite.html   12 Jan 2019 16:23:09 -
@@ -142,7 +142,7 @@
 
 Free for all
 
-The following changes can be made by everyone with SVN write access:
+The following changes can be made by everyone with write access:
 
 Obvious fixes can be committed without prior approval.  Just check
 in the fix and copy it to gcc-patches.  A good test to
@@ -207,13 +207,12 @@
 when performing checkins to avoid accidental checkins of local
 code.
 
-We prefer that each SVN checkin be of a complete, single logical
+We prefer that each checkin be of a complete, single logical
 change, which may affect multiple files.  The log message for that
 checkin should be the complete ChangeLog entry for the change.  This
 makes it easier to correlate changes across files, and minimizes the
 time the repository is inconsistent.  If you have several unrelated
-changes, you should check them in with separate SVN commit
-commands.
+changes, you should check them in separately.
 
 
 Sync your sources with the master repository via "svn


Re: ISO_Fortran_binding patch

2019-01-12 Thread Paul Richard Thomas
Hi Thomas,

Committed as revision 267881. I removed the duplicate include file and
added some documentation, as suggested.

Many thanks for all the help

Paul

On Tue, 8 Jan 2019 at 23:19, Thomas Koenig  wrote:
>
> Hi Paul,
>
> > This is an updated version of the earlier patch. The main addition is
> > a second testcase that checks the errors emitted by the CFI API
> > functions.
>
> I notice that the header file ISO_Fortran_binding.h is found twice
> in the patch.
>
> Is there any particular reason why you do not want to use
>
> ! { dg-additional-options "-I $srcdir/../../libgfortran" }
>
> in the test cases, and have it only once in the source trees?
>
> However, I have no real strong opinion on this matter, if you
> want to keep it as submitted, it is also fine.
>
> Therefore: OK for trunk, and thanks a lot for the patch!
>
> Documentation we can add at a later date, I think.
>
> Regards
>
> Thomas



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: [wwwdocs] Add __cpp_* feature macros to C++20 entries + other changes that have those in projects/cxx_status.html

2019-01-12 Thread Gerald Pfeifer
On Sat, 12 Jan 2019, Jakub Jelinek wrote:
> Ok for wwwdocs (or do you suggest something different for the P0941R2
> imlementation status)?

I think that was a question to Jason and Jonathan wrt contents?

Markup-wise this looks fine, and adding those links looks like a 
good idea.

Gerald


[C++ PATCH] Add __cpp_guaranteed_copy_elision and __cpp_nontype_template_parameter_auto

2019-01-12 Thread Jakub Jelinek
Hi!

So, from what I can understand, __cpp_guaranteed_copy_elision
is a C++17 P0135R1 feature test macro for a feature we claim to support,
and __cpp_nontype_template_parameter_auto is a new name for the
__cpp_template_auto macro (which doesn't appear anymore in the SD-6 lists,
but clang++ keeps it for backwards compatibility too).

Tested on x86_64-linux, ok for trunk?

2019-01-12  Jakub Jelinek  

* c-cppbuiltin.c (c_cpp_builtin): Define __cpp_guaranteed_copy_elision
and __cpp_nontype_template_parameter_auto.  Add a comment that
__cpp_template_auto is deprecated.

* g++.dg/cpp1z/feat-cxx1z.C: Add tests for
__cpp_guaranteed_copy_elision and __cpp_nontype_template_parameter_auto
feature test macros.
* g++.dg/cpp2a/feat-cxx2a.C: Likewise.

--- gcc/c-family/c-cppbuiltin.c.jj  2019-01-01 12:37:51.340414101 +0100
+++ gcc/c-family/c-cppbuiltin.c 2019-01-12 14:20:12.792148907 +0100
@@ -971,9 +971,13 @@ c_cpp_builtins (cpp_reader *pfile)
  cpp_define (pfile, "__cpp_aggregate_bases=201603");
  cpp_define (pfile, "__cpp_deduction_guides=201703");
  cpp_define (pfile, "__cpp_noexcept_function_type=201510");
+ /* Old macro, superseded by
+__cpp_nontype_template_parameter_auto.  */
  cpp_define (pfile, "__cpp_template_auto=201606");
  cpp_define (pfile, "__cpp_structured_bindings=201606");
  cpp_define (pfile, "__cpp_variadic_using=201611");
+ cpp_define (pfile, "__cpp_guaranteed_copy_elision=201606");
+ cpp_define (pfile, "__cpp_nontype_template_parameter_auto=201606");
}
   if (cxx_dialect > cxx17)
{
--- gcc/testsuite/g++.dg/cpp1z/feat-cxx1z.C.jj  2018-10-22 09:28:06.387657035 
+0200
+++ gcc/testsuite/g++.dg/cpp1z/feat-cxx1z.C 2019-01-12 14:27:15.014332003 
+0100
@@ -417,6 +417,18 @@
 #  error "__cpp_variadic_using != 201611"
 #endif
 
+#ifndef __cpp_guaranteed_copy_elision
+#  error "__cpp_guaranteed_copy_elision"
+#elif __cpp_guaranteed_copy_elision != 201606
+#  error "__cpp_guaranteed_copy_elision != 201606"
+#endif
+
+#ifndef __cpp_nontype_template_parameter_auto
+#  error "__cpp_nontype_template_parameter_auto"
+#elif __cpp_nontype_template_parameter_auto != 201606
+#  error "__cpp_nontype_template_parameter_auto != 201606"
+#endif
+
 #ifdef __has_cpp_attribute
 
 #  if ! __has_cpp_attribute(maybe_unused)
--- gcc/testsuite/g++.dg/cpp2a/feat-cxx2a.C.jj  2018-11-17 00:16:41.302392263 
+0100
+++ gcc/testsuite/g++.dg/cpp2a/feat-cxx2a.C 2019-01-12 14:29:03.283584283 
+0100
@@ -416,6 +416,18 @@
 #  error "__cpp_variadic_using != 201611"
 #endif
 
+#ifndef __cpp_guaranteed_copy_elision
+#  error "__cpp_guaranteed_copy_elision"
+#elif __cpp_guaranteed_copy_elision != 201606
+#  error "__cpp_guaranteed_copy_elision != 201606"
+#endif
+
+#ifndef __cpp_nontype_template_parameter_auto
+#  error "__cpp_nontype_template_parameter_auto"
+#elif __cpp_nontype_template_parameter_auto != 201606
+#  error "__cpp_nontype_template_parameter_auto != 201606"
+#endif
+
 // C++20 features
 
 #if __cpp_conditional_explicit != 201806

Jakub


[PATCH, powerpc] Fix speculation barrier and group nop to emit target register names.

2019-01-12 Thread Iain Sandoe
Hi,

The current implementation of “speculation_barrier” and “group_end_nop” insns 
emit hard-wired register names which causes tests using them to fail on Darwin, 
at least, which uses “rNN” instead of “NN”.

The patch makes the register names for these insns use the operand output 
mechanism to substitute the appropriate variant when needed.

tested on powerpc-darwin9 and powerpc64-linux.

OK for trunk?
Iain

gcc/

* config/rs6000/rs6000.md (group_end_nop): Emit
insn register names using operand format, rather than
hard-wired.  (speculation_barrier): Likewise.


diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 56364e0..86badc2 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -12494,15 +12494,18 @@
   [(unspec [(const_int 0)] UNSPEC_GRP_END_NOP)]
   ""
 {
-  if (rs6000_tune == PROCESSOR_POWER6)
-return "ori 1,1,0";
-  return "ori 2,2,0";
+  operands[0] = gen_rtx_REG (Pmode,
+rs6000_tune == PROCESSOR_POWER6 ? 1 : 2);
+  return "ori %0,%0,0";
 })
 
 (define_insn "speculation_barrier"
   [(unspec_volatile:BLK [(const_int 0)] UNSPECV_SPEC_BARRIER)]
   ""
-  "ori 31,31,0")
+{
+  operands[0] = gen_rtx_REG (Pmode, 31);
+  return "ori %0,%0,0";
+})
 
 ;; Define the subtract-one-and-jump insns, starting with the template
 ;; so loop.c knows what to generate.



[wwwdocs] Add __cpp_* feature macros to C++20 entries + other changes that have those in projects/cxx_status.html

2019-01-12 Thread Jakub Jelinek
Hi!

On Fri, Jan 11, 2019 at 04:54:11PM +0100, Jakub Jelinek wrote:
> I've noticed we don't have any feature test macros in the table for C++20,
> even when a couple of the features have them defined.

Here is an updated patch, that in addition to that makes 9 URLs as we now
have #cxx in gcc-9/changes.html and adds missing P0941R2 entry that clang
table has.  For that one I'm not 100% sure what to say, I've copied all the
macros from http://wg21.link/p0941r2 into two source files (attached below),
one for core language features, another one for library and tested those
with -std=c++2a with current trunk.  Compared to what the paper lists, we
have __has_cpp_attribute (carries_dependency) 0, __cpp_guaranteed_copy_elision
and __cpp_nontype_template_parameter_auto not defined.  Is that what we
want?  On the library side, __cpp_lib_any, __cpp_lib_execution,
__cpp_lib_hardware_interference_size, __cpp_lib_null_iterators,
__cpp_lib_parallel_algorithm, __cpp_lib_raw_memory_algorithms,
__cpp_lib_to_chars, __cpp_lib_uncaught_exceptions, __cpp_lib_variant
macros aren't defined (at least not in ) and
__cpp_lib_optional, __cpp_lib_shared_ptr_arrays, __cpp_lib_string_view
have smaller values than those in the P0941R2.
Is that the desirable state given current C++2A implementation status?

Ok for wwwdocs (or do you suggest something different for the P0941R2
imlementation status)?

--- htdocs/projects/cxx-status.html.jj  2019-01-12 12:27:22.966732519 +0100
+++ htdocs/projects/cxx-status.html 2019-01-12 13:57:47.457900425 +0100
@@ -115,7 +115,7 @@
 
Range-based for statements with initializer 
   http://wg21.link/p0614r1;>P0614R1 
-   9 
+   9 

 
 
@@ -127,28 +127,28 @@
 
ADL and function templates that are not visible 
   http://wg21.link/p0846r0;>P0846R0 
-   9 
+   9 

 
 
const mismatch with defaulted copy constructor 
   http://wg21.link/p0641r2;>P0641R2 
-   9 
+   9 

 
 
Less eager instantiation of constexpr functions 
   http://wg21.link/p0859r0;>P0859R0
-   5.2 (mostly)9 (P0859R0) 
+   5.2 (mostly) 9 (P0859R0) 

 
 
Consistent comparison (operator=) 
   http://wg21.link/p0515r3;>P0515R3
-   http://wg21.link/P0905r1;>P0905R1
+   http://wg21.link/p0905r1;>P0905R1
http://wg21.link/p1120r0;>P1120R0
No 
-   
+   __cpp_impl_three_way_comparison = 201711 
 
 
Access checking on specializations 
@@ -159,19 +159,19 @@
 
Default constructible and assignable stateless lambdas 
   http://wg21.link/p0624r2;>P0624R2
-   9 
+   9 

 
 
Lambdas in unevaluated contexts 
   http://wg21.link/p0315r4;>P0315R4
-   9 
+   9 

 
 
Language support for empty objects 
   http://wg21.link/p0840r2;>P0840R2
-   9 
+   9 

 
 
@@ -195,32 +195,32 @@
 
Down with typename! 
   http://wg21.link/p0634r3;>P0634R3
-   9 
+   9 

 
 
Allow pack expansion in lambda init-capture 
   http://wg21.link/p0780r2;>P0780R2
-   9 
+   9 

 
 
Proposed wording for likely and unlikely attributes 
   http://wg21.link/p0479r5;>P0479R5
-   9 
+   9 

 
 
Deprecate implicit capture of this via [=] 
   http://wg21.link/p0806r2;>P0806R2
-   9 
+   9 

 
 
Class Types in Non-Type Template Parameters 
   http://wg21.link/p0732r2;>P0732R2
-   9 
-   
+   9 
+   __cpp_nontype_template_parameter_class = 201806 
 
 
Atomic Compare-and-Exchange with Padding Bits 
@@ -231,19 +231,19 @@
 
Efficient sized delete for variable sized classes 
   http://wg21.link/p0722r3;>P0722R3
-   9 
-   
+   9 
+   __cpp_impl_destroying_delete = 201806 
 
 
Allowing Virtual Function Calls in Constant Expressions 
   http://wg21.link/p1064r0;>P1064R0
-   9 
+   9 

 
 
Prohibit aggregates with user-declared constructors 
   http://wg21.link/p1008r1;>P1008R1
-   9 
+   9 

 
 
@@ -256,20 +256,20 @@
 
explicit(bool) 
   http://wg21.link/p0892r2;>P0892R2
-   9 
-   
+   9 
+   __cpp_conditional_explicit = 201806 
 
 
Signed integers are two's complement 
   http://wg21.link/p1236r1;>P1236R1
-   9 
+   9 

 
 
char8_t 
   http://wg21.link/p0482r6;>P0482R6
No 
-   
+   __cpp_char8_t = 201811 
 
 
Immediate functions (consteval) 
@@ -280,23 +280,29 @@
 
std::is_constant_evaluated 
   http://wg21.link/p0595r2;>P0595R2
-   9 
+   9 

 
 
Nested inline namespaces 
   http://wg21.link/p1094r2;>P1094R2
-   9 

Re: [PATCH] PR fortran/61765 -- Avoid ENTRY names in check of repeditive symbols

2019-01-12 Thread Paul Richard Thomas
Hi Steve,

This is OK for trunk.

Thanks

Paul

On Sat, 12 Jan 2019 at 04:34, Steve Kargl
 wrote:
>
> The attached patch has been tested on x86_64-*-freebsd.   There
> were no regression.  The patch is less then obvious, but simple.
> OK to commit?
>
> 2019-01-11  Steven G. Kargl  
>
> PR fortran/61765
> * resolve.c (gfc_verify_binding_labels): Break if-elseif-elseif 
> structure into independent
> if's with a return to simplify logic.  Avoid a check for ENTRY name 
> with bind(c).
>
> 2019-01-11  Steven G. Kargl  
>
> PR fortran/61765
> * gfortran.dg/pr61765.f90: New test.
>
> --
> Steve



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: [wwwdoc][Patch] Mention Loongson 3a1000 3a2000 3a3000 2k1000 support in gcc9

2019-01-12 Thread Paul Hua
ping?

On Mon, Dec 31, 2018 at 6:27 PM Paul Hua  wrote:
>
> Hi Gerald,
>
> The attached patch mention Loongson 3a1000 3a2000 3a3000 2k1000 support in 
> gcc9.
>
> ok for commit?


[committed][nvptx] Allow default vl to be overridden in nvptx_goacc_validate_dims_1

2019-01-12 Thread Tom de Vries
Hi,

In nvptx_goacc_validate_dims_1, allow oacc_default_dims[DIM_VECTOR] to be
overridden, by assigning it to a new variable default_vector_length at the
start, and using it at the end.

Committed to trunk.

Thanks,
- Tom

[nvptx] Allow default vl to be overridden in nvptx_goacc_validate_dims_1

2019-01-11  Tom de Vries  

* config/nvptx/nvptx.c (nvptx_goacc_validate_dims_1): Add an use new
variable default_vector_length.

---
 gcc/config/nvptx/nvptx.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index a4c79532a1d..95d72d0a4e0 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -5556,6 +5556,7 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int 
fn_level, unsigned used)
   bool offload_region_p = false;
   bool routine_p = false;
   bool routine_seq_p = false;
+  int default_vector_length = -1;
 
   if (decl == NULL_TREE)
 {
@@ -5654,6 +5655,12 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int 
fn_level, unsigned used)
   gcc_assert (dims[GOMP_DIM_GANG] >= -1);
 }
 
+  if (offload_region_p)
+default_vector_length = oacc_get_default_dim (GOMP_DIM_VECTOR);
+  else
+/* oacc_default_dims_p.  */
+default_vector_length = PTX_DEFAULT_VECTOR_LENGTH;
+
   int old_dims[GOMP_DIM_MAX];
   unsigned int i;
   for (i = 0; i < GOMP_DIM_MAX; ++i)
@@ -5673,12 +5680,12 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int 
fn_level, unsigned used)
   if (dims[GOMP_DIM_VECTOR] == 0)
 {
   vector_reason = G_("using vector_length (%d), ignoring runtime setting");
-  dims[GOMP_DIM_VECTOR] = PTX_DEFAULT_VECTOR_LENGTH;
+  dims[GOMP_DIM_VECTOR] = default_vector_length;
 }
 
   if (dims[GOMP_DIM_VECTOR] > 0
   && !nvptx_welformed_vector_length_p (dims[GOMP_DIM_VECTOR]))
-dims[GOMP_DIM_VECTOR] = PTX_DEFAULT_VECTOR_LENGTH;
+dims[GOMP_DIM_VECTOR] = default_vector_length;
 
   nvptx_apply_dim_limits (dims);
 
@@ -5696,7 +5703,7 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int 
fn_level, unsigned used)
 
   if (oacc_default_dims_p)
 {
-  dims[GOMP_DIM_VECTOR] = PTX_DEFAULT_VECTOR_LENGTH;
+  dims[GOMP_DIM_VECTOR] = default_vector_length;
   if (dims[GOMP_DIM_WORKER] < 0)
dims[GOMP_DIM_WORKER] = PTX_DEFAULT_RUNTIME_DIM;
   if (dims[GOMP_DIM_GANG] < 0)
@@ -5715,7 +5722,9 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int 
fn_level, unsigned used)
/* Function oacc_validate_dims will apply the minimal dimension.  */
continue;
 
- dims[i] = oacc_get_default_dim (i);
+ dims[i] = (i == GOMP_DIM_VECTOR
+? default_vector_length
+: oacc_get_default_dim (i));
}
 
   nvptx_apply_dim_limits (dims);


[committed][nvptx] Apply vector-partitionable routines workaround to default vl

2019-01-12 Thread Tom de Vries
Hi,

Make "[nvptx] Force vl32 if calling vector-partitionable routines" work as well
if vector length is set by modifying PTX_DEFAULT_VECTOR_LENGTH.

Committed to trunk.

Thanks,
- Tom

[nvptx] Apply vector-partitionable routines workaround to default vl

2019-01-11  Tom de Vries  

* config/nvptx/nvptx.c (nvptx_goacc_validate_dims_1): In offloading
region calling vector-partitionable routine, set default_vector_length
to WARP_SIZE.

---
 gcc/config/nvptx/nvptx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 95d72d0a4e0..1d9704543d9 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -5669,6 +5669,8 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int 
fn_level, unsigned used)
   const char *vector_reason = NULL;
   if (offload_region_p && has_vector_partitionable_routine_calls_p (decl))
 {
+  default_vector_length = PTX_WARP_SIZE;
+
   if (dims[GOMP_DIM_VECTOR] > PTX_WARP_SIZE)
{
  vector_reason = G_("using vector_length (%d) due to call to"


[committed][nvptx] Verify dimension limits after applying defaults

2019-01-12 Thread Tom de Vries
Hi,

There's a problem in oacc_validate_dims that when f.i. the worker dimension
is set using -fopenacc-dim=:32, and the vector_length is set using a
"vector_length (128)" clause, the compiler combines, accepts and emits the
values, while the combination of the two is invalid.

The reason for this is that while oacc_validate_dims validates the dimensions
using targetm.goacc.validate_dims before applying default or minimum values,
it does not do so afterwards.

Work around this in the nvptx port by applying the defaults from
oacc_default_dims at the end of nvptx_goacc_validate_dims_1, as
oacc_validate_dims would do it, and then apply the dimensions limits.

Committed to trunk.

Thanks,
- Tom

[nvptx] Verify dimension limits after applying defaults

2019-01-11  Tom de Vries  

PR middle-end/88703
* config/nvptx/nvptx.c (nvptx_goacc_validate_dims_1): Apply defaults
from oacc_default_dims, as oacc_validate_dims would do it, and apply
dimensions limits.

---
 gcc/config/nvptx/nvptx.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 89f0e560910..a4c79532a1d 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -5703,6 +5703,23 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int 
fn_level, unsigned used)
dims[GOMP_DIM_GANG] = PTX_DEFAULT_RUNTIME_DIM;
   nvptx_apply_dim_limits (dims);
 }
+
+  if (offload_region_p)
+{
+  for (i = 0; i < GOMP_DIM_MAX; i++)
+   {
+ if (!(dims[i] < 0))
+   continue;
+
+ if ((used & GOMP_DIM_MASK (i)) == 0)
+   /* Function oacc_validate_dims will apply the minimal dimension.  */
+   continue;
+
+ dims[i] = oacc_get_default_dim (i);
+   }
+
+  nvptx_apply_dim_limits (dims);
+}
 }
 
 /* Validate compute dimensions of an OpenACC offload or routine, fill


[committed][openacc] Add used parameter to TARGET_GOACC_VALIDATE_DIMS

2019-01-12 Thread Tom de Vries
Hi,

Add a used parameter to TARGET_GOACC_VALIDATE_DIMS, allowing a target to make
decisions in the hook implementation based on whether a dimension is used or
not.

Committed to trunk.

Thanks,
- Tom

[openacc] Add used parameter to TARGET_GOACC_VALIDATE_DIMS

2019-01-11  Tom de Vries  

* config/nvptx/nvptx.c (nvptx_goacc_validate_dims_1)
(nvptx_goacc_validate_dims): Add used parameter.
* doc/tm.texi: Regenerate.
* omp-offload.c (oacc_parse_default_dims, oacc_validate_dims): Add
argument to call to targetm.goacc.validate_dims.
(default_goacc_validate_dims): Add used
parameter.
* target.def (validate_dims): Add used parameter in DEFHOOK.
* targhooks.h (default_goacc_validate_dims): Add used parameter.

---
 gcc/config/nvptx/nvptx.c | 6 +++---
 gcc/doc/tm.texi  | 2 +-
 gcc/omp-offload.c| 9 +
 gcc/target.def   | 2 +-
 gcc/targhooks.h  | 2 +-
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index b37010ff58e..89f0e560910 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -5549,7 +5549,7 @@ has_vector_partitionable_routine_calls_p (tree fndecl)
DIMS has changed.  */
 
 static void
-nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level)
+nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level, unsigned 
used)
 {
   bool oacc_default_dims_p = false;
   bool oacc_min_dims_p = false;
@@ -5711,7 +5711,7 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int 
fn_level)
DECL is null, we are validating the default dimensions.  */
 
 static bool
-nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level)
+nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level, unsigned used)
 {
   int old_dims[GOMP_DIM_MAX];
   unsigned int i;
@@ -5719,7 +5719,7 @@ nvptx_goacc_validate_dims (tree decl, int dims[], int 
fn_level)
   for (i = 0; i < GOMP_DIM_MAX; ++i)
 old_dims[i] = dims[i];
 
-  nvptx_goacc_validate_dims_1 (decl, dims, fn_level);
+  nvptx_goacc_validate_dims_1 (decl, dims, fn_level, used);
 
   gcc_assert (dims[GOMP_DIM_VECTOR] != 0);
   if (dims[GOMP_DIM_WORKER] > 0 && dims[GOMP_DIM_VECTOR] > 0)
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index ddde4a7287b..355fd5917af 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6071,7 +6071,7 @@ to use it.
 Return number of threads in SIMT thread group on the target.
 @end deftypefn
 
-@deftypefn {Target Hook} bool TARGET_GOACC_VALIDATE_DIMS (tree @var{decl}, int 
*@var{dims}, int @var{fn_level})
+@deftypefn {Target Hook} bool TARGET_GOACC_VALIDATE_DIMS (tree @var{decl}, int 
*@var{dims}, int @var{fn_level}, unsigned @var{used})
 This hook should check the launch dimensions provided for an OpenACC
 compute region, or routine.  Defaulted values are represented as -1
 and non-constant values as 0.  The @var{fn_level} is negative for the
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 9cac5655c63..201c459fa1f 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -644,8 +644,8 @@ oacc_parse_default_dims (const char *dims)
 }
 
   /* Allow the backend to validate the dimensions.  */
-  targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
-  targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
+  targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0);
+  targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
 }
 
 /* Validate and update the dimensions for offloaded FN.  ATTRS is the
@@ -673,7 +673,7 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int 
level, unsigned used)
   pos = TREE_CHAIN (pos);
 }
 
-  bool changed = targetm.goacc.validate_dims (fn, dims, level);
+  bool changed = targetm.goacc.validate_dims (fn, dims, level, used);
 
   /* Default anything left to 1 or a partitioned default.  */
   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
@@ -1717,7 +1717,8 @@ execute_oacc_device_lower ()
 
 bool
 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
-int ARG_UNUSED (fn_level))
+int ARG_UNUSED (fn_level),
+unsigned ARG_UNUSED (used))
 {
   bool changed = false;
 
diff --git a/gcc/target.def b/gcc/target.def
index 2aeb1ff8445..32830a1b33c 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1686,7 +1686,7 @@ are being validated and unspecified defaults should be 
filled in.\n\
 Diagnostics should be issued as appropriate.  Return\n\
 true, if changes have been made.  You must override this hook to\n\
 provide dimensions larger than 1.",
-bool, (tree decl, int *dims, int fn_level),
+bool, (tree decl, int *dims, int fn_level, unsigned used),
 default_goacc_validate_dims)
 
 DEFHOOK
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 3b6e404f080..fb133fd3f3f 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -121,7 +121,7 @@ extern void default_finish_cost (void *, 

[committed] Tweak m4/minloc0.m4 indentation to avoid -Wmisleading-indentation warnings (PR libfortran/88807)

2019-01-12 Thread Jakub Jelinek
Hi!

The following patch reindents parts of m4/minloc0.m4 to avoid
../../../libgfortran/generated/minloc0_4_i1.c:138:5: warning: this ‘else’ 
clause does not guard... [-Wmisleading-indentation]
etc. warnings.  The indentation of the generated sources is misleading
for many of the sources and fixing it properly would be harder - this
patch just changes it so that we don't have
  else
something;
following statements;
which -Wmisleading-indentation among other things warns about.

Bootstrapped/regtested on x86_64-linux and i686-linux, preapproved by Steven
in the PR, commited to trunk.

If we ignored 8 spaces vs. tabs, perhaps we could define a couple of indentN
m4 macros and used them in front of the various chunks provided from
iforeach*.m4, so that different *.m4 files would have better control on how
much does it indent.  But not really sure if even that would lead to
something correct.

2019-01-12  Jakub Jelinek  

PR libfortran/88807
* m4/minloc0.m4: Reindent to avoid -Wmisleading-indentation warnings.
* generated/minloc0_4_i1.c: Regenerated.
* generated/minloc0_4_i2.c: Regenerated.
* generated/minloc0_4_i4.c: Regenerated.
* generated/minloc0_4_i8.c: Regenerated.
* generated/minloc0_4_i16.c: Regenerated.
* generated/minloc0_4_r4.c: Regenerated.
* generated/minloc0_4_r8.c: Regenerated.
* generated/minloc0_4_r10.c: Regenerated.
* generated/minloc0_4_r16.c: Regenerated.
* generated/minloc0_8_i1.c: Regenerated.
* generated/minloc0_8_i2.c: Regenerated.
* generated/minloc0_8_i4.c: Regenerated.
* generated/minloc0_8_i8.c: Regenerated.
* generated/minloc0_8_i16.c: Regenerated.
* generated/minloc0_8_r4.c: Regenerated.
* generated/minloc0_8_r8.c: Regenerated.
* generated/minloc0_8_r10.c: Regenerated.
* generated/minloc0_8_r16.c: Regenerated.
* generated/minloc0_16_i1.c: Regenerated.
* generated/minloc0_16_i2.c: Regenerated.
* generated/minloc0_16_i4.c: Regenerated.
* generated/minloc0_16_i8.c: Regenerated.
* generated/minloc0_16_i16.c: Regenerated.
* generated/minloc0_16_r4.c: Regenerated.
* generated/minloc0_16_r8.c: Regenerated.
* generated/minloc0_16_r10.c: Regenerated.
* generated/minloc0_16_r16.c: Regenerated.

--- libgfortran/m4/minloc0.m4.jj2019-01-01 12:38:37.03848 +0100
+++ libgfortran/m4/minloc0.m4   2019-01-12 01:11:14.779583814 +0100
@@ -63,27 +63,27 @@ FOREACH_FUNCTION(
}
   else
 #endif
-if (back)
-  do
-   {
- if (unlikely (*base <= minval))
-   {
- minval = *base;
- for (n = 0; n < rank; n++)
-   dest[n * dstride] = count[n] + 1;
-   }
- base += sstride[0];
-   }
-  while (++count[0] != extent[0]);
-else
-  do
-{
- if (unlikely (*base < minval))
-   {
- minval = *base;
- for (n = 0; n < rank; n++)
-   dest[n * dstride] = count[n] + 1;
-   }')
+  if (back)
+   do
+ {
+   if (unlikely (*base <= minval))
+ {
+   minval = *base;
+   for (n = 0; n < rank; n++)
+ dest[n * dstride] = count[n] + 1;
+ }
+   base += sstride[0];
+ }
+   while (++count[0] != extent[0]);
+  else
+   do
+ {
+   if (unlikely (*base < minval))
+ {
+   minval = *base;
+   for (n = 0; n < rank; n++)
+ dest[n * dstride] = count[n] + 1;
+ }')
 MASKED_FOREACH_FUNCTION(
 `  atype_name minval;
int fast = 0;
--- libgfortran/generated/minloc0_4_i1.c.jj 2019-01-01 12:38:33.561721355 
+0100
+++ libgfortran/generated/minloc0_4_i1.c2019-01-12 01:11:21.469475297 
+0100
@@ -123,27 +123,27 @@ minloc0_4_i1 (gfc_array_i4 * const restr
}
   else
 #endif
-if (back)
-  do
-   {
- if (unlikely (*base <= minval))
-   {
- minval = *base;
- for (n = 0; n < rank; n++)
-   dest[n * dstride] = count[n] + 1;
-   }
- base += sstride[0];
-   }
-  while (++count[0] != extent[0]);
-else
-  do
-{
- if (unlikely (*base < minval))
-   {
- minval = *base;
- for (n = 0; n < rank; n++)
-   dest[n * dstride] = count[n] + 1;
-   }
+  if (back)
+   do
+ {
+   if (unlikely (*base <= minval))
+ {
+   minval = *base;
+   for (n = 0; n < rank; n++)
+ dest[n * dstride] = count[n] + 1;
+ }
+   base += sstride[0];
+ }
+   while (++count[0] != extent[0]);
+  else
+   do
+ {
+   if (unlikely (*base < minval))
+ {
+   minval =