Hi,

atm the test-case contained in this patch hangs.

For the test-case we generate:
...
  @ %r79 bra $L18;
  {
    call _gfortran_abort;
    trap;
    exit;
  }
 $L18:
...

which results in SASS code (at GOMP_NVPTX_JIT=-O4):
...
        /*05d8*/               @P0 BRA `(.L_18);
        /*05e8*/                   JCAL `(_gfortran_abort);
        /*05f0*/                   BPT.TRAP 0x1;
        /*05f8*/                   EXIT;
.L_18:
...
There's no convergence point generated for the diverging branch, so we may end up executing random code after .L18 (a problem I long suspected could happen, but never observed until now).

The patch adds an exit on the other path, making sure that all threads in the warp reach exit, and indeed fixing the hang:
...
  @ %r79 bra $L18;
  {
    call _gfortran_abort;
    trap;
    exit;
  }
 $L18:
 exit;
...

Build and reg-tested on x86_64 with nvptx accelerator.

I'll commit this shortly for stage4. Strictly speaking, this is not an 8 regression, but a wrong code bug. But I think that the code generation error seems fundamental enough, and the fix simple and localized enough, that it's stage4 permissible.

Thanks,
- Tom
[nvptx, PR81352] Add exit insn after noreturn call for neutered threads in warp

2018-01-23  Tom de Vries  <t...@codesourcery.com>

	PR target/81352
	* config/nvptx/nvptx.c (nvptx_single): Add exit insn after noreturn call
	for neutered threads in warp.
	* config/nvptx/nvptx.md (define_insn "exit"): New insn.

	* testsuite/libgomp.oacc-fortran/pr81352.f90: New test.

---
 gcc/config/nvptx/nvptx.c                           |  7 ++++++-
 gcc/config/nvptx/nvptx.md                          |  5 +++++
 libgomp/testsuite/libgomp.oacc-fortran/pr81352.f90 | 20 ++++++++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index f5bb438..3516740 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -4062,7 +4062,12 @@ nvptx_single (unsigned mask, basic_block from, basic_block to)
 	if (tail_branch)
 	  before = emit_label_before (label, before);
 	else
-	  emit_label_after (label, tail);
+	  {
+	    rtx_insn *label_insn = emit_label_after (label, tail);
+	    if (mode == GOMP_DIM_VECTOR && CALL_P (tail)
+		&& find_reg_note (tail, REG_NORETURN, NULL))
+	      emit_insn_after (gen_exit (), label_insn);
+	  }
       }
 
   /* Now deal with propagating the branch condition.  */
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index f9c087b..135479b 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -994,6 +994,11 @@
   ""
   "")
 
+(define_insn "exit"
+  [(const_int 1)]
+  ""
+  "exit;")
+
 (define_insn "return"
   [(return)]
   ""
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr81352.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr81352.f90
new file mode 100644
index 0000000..f6969c8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pr81352.f90
@@ -0,0 +1,20 @@
+! { dg-do run }
+
+program foo
+  integer :: a(3,3), l, ll
+  a = 0
+
+  !$acc parallel num_gangs (1) num_workers(1)
+
+  do l=1,3
+     !$acc loop vector
+     do ll=1,3
+        a(l,ll) = 2
+     enddo
+  enddo
+
+  if (any(a(1:3,1:3).ne.2)) call abort
+
+  !$acc end parallel
+
+end program foo

Reply via email to