[PATCH] D66673: [OPENMP][NVPTX]Fix critical region codegen.

2019-08-26 Thread Alexey Bataev via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL369946: [OPENMP][NVPTX]Fix critical region codegen. 
(authored by ABataev, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D66673/new/

https://reviews.llvm.org/D66673

Files:
  cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp


Index: cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
===
--- cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
+++ cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -343,6 +343,7 @@
 
 // CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* 
noalias %{{.+}}, i32* dereferenceable{{.*}})
 // CHECK:  [[CC:%.+]] = alloca i32,
+// CHECK:  [[MASK:%.+]] = call i32 @__kmpc_warp_active_thread_mask()
 // CHECK:  [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 // CHECK:  [[NUM_THREADS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK:  store i32 0, i32* [[CC]],
@@ -362,7 +363,7 @@
 // CHECK:  store i32
 // CHECK:  call void @__kmpc_end_critical(
 
-// CHECK:  call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
+// CHECK:  call void @__kmpc_syncwarp(i32 [[MASK]])
 // CHECK:  [[NEW_CC_VAL:%.+]] = add nsw i32 [[CC_VAL]], 1
 // CHECK:  store i32 [[NEW_CC_VAL]], i32* [[CC]],
 // CHECK:  br label
Index: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -107,6 +107,10 @@
   /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
   /// global_tid);
   OMPRTL__kmpc_barrier_simple_spmd,
+  /// Call to int32_t __kmpc_warp_active_thread_mask(void);
+  OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
+  /// Call to void __kmpc_syncwarp(int32_t Mask);
+  OMPRTL_NVPTX__kmpc_syncwarp,
 };
 
 /// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -1794,6 +1798,20 @@
 ->addFnAttr(llvm::Attribute::Convergent);
 break;
   }
+  case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
+// Build int32_t __kmpc_warp_active_thread_mask(void);
+auto *FnTy =
+llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
+RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
+break;
+  }
+  case OMPRTL_NVPTX__kmpc_syncwarp: {
+// Build void __kmpc_syncwarp(kmp_int32 Mask);
+auto *FnTy =
+llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
+RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_syncwarp");
+break;
+  }
   }
   return RTLFn;
 }
@@ -2700,6 +2718,9 @@
   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
   llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
 
+  // Get the mask of active threads in the warp.
+  llvm::Value *Mask = CGF.EmitRuntimeCall(
+  createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
   // Fetch team-local id of the thread.
   llvm::Value *ThreadID = getNVPTXThreadID(CGF);
 
@@ -2740,8 +2761,9 @@
   // Block waits for all threads in current team to finish then increments the
   // counter variable and returns to the loop.
   CGF.EmitBlock(SyncBB);
-  emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false,
-  /*ForceSimpleCall=*/true);
+  // Reconverge active threads in the warp.
+  (void)CGF.EmitRuntimeCall(
+  createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
 
   llvm::Value *IncCounterVal =
   CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));


Index: cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
===
--- cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
+++ cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -343,6 +343,7 @@
 
 // CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.*}})
 // CHECK:  [[CC:%.+]] = alloca i32,
+// CHECK:  [[MASK:%.+]] = call i32 @__kmpc_warp_active_thread_mask()
 // CHECK:  [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 // CHECK:  [[NUM_THREADS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK:  store i32 0, i32* [[CC]],
@@ -362,7 +363,7 @@
 // CHECK:  store i32
 // CHECK:  call void @__kmpc_end_critical(
 
-// CHECK:  call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
+// CHECK:  call void @__kmpc_syncwarp(i32 [[MASK]])
 // CHECK:  [[NEW_CC_VAL:%.+]] = add nsw i32 [[CC_VAL]], 1
 // CHECK:  store i32 [[NEW_CC_VAL]], i32* [[CC]],
 // CHECK:  br label
Index: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

[PATCH] D66673: [OPENMP][NVPTX]Fix critical region codegen.

2019-08-26 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert accepted this revision.
jdoerfert added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D66673/new/

https://reviews.llvm.org/D66673



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D66673: [OPENMP][NVPTX]Fix critical region codegen.

2019-08-23 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev updated this revision to Diff 216934.
ABataev added a comment.

Fix the test


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D66673/new/

https://reviews.llvm.org/D66673

Files:
  lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  test/OpenMP/nvptx_parallel_codegen.cpp


Index: test/OpenMP/nvptx_parallel_codegen.cpp
===
--- test/OpenMP/nvptx_parallel_codegen.cpp
+++ test/OpenMP/nvptx_parallel_codegen.cpp
@@ -343,6 +343,7 @@
 
 // CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* 
noalias %{{.+}}, i32* dereferenceable{{.*}})
 // CHECK:  [[CC:%.+]] = alloca i32,
+// CHECK:  [[MASK:%.+]] = call i32 @__kmpc_warp_active_thread_mask()
 // CHECK:  [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 // CHECK:  [[NUM_THREADS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK:  store i32 0, i32* [[CC]],
@@ -362,7 +363,7 @@
 // CHECK:  store i32
 // CHECK:  call void @__kmpc_end_critical(
 
-// CHECK:  call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
+// CHECK:  call void @__kmpc_syncwarp(i32 [[MASK]])
 // CHECK:  [[NEW_CC_VAL:%.+]] = add nsw i32 [[CC_VAL]], 1
 // CHECK:  store i32 [[NEW_CC_VAL]], i32* [[CC]],
 // CHECK:  br label
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -107,6 +107,10 @@
   /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
   /// global_tid);
   OMPRTL__kmpc_barrier_simple_spmd,
+  /// Call to int32_t __kmpc_warp_active_thread_mask(void);
+  OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
+  /// Call to void __kmpc_syncwarp(int32_t Mask);
+  OMPRTL_NVPTX__kmpc_syncwarp,
 };
 
 /// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -1794,6 +1798,20 @@
 ->addFnAttr(llvm::Attribute::Convergent);
 break;
   }
+  case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
+// Build int32_t __kmpc_warp_active_thread_mask(void);
+auto *FnTy =
+llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
+RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
+break;
+  }
+  case OMPRTL_NVPTX__kmpc_syncwarp: {
+// Build void __kmpc_syncwarp(kmp_int32 Mask);
+auto *FnTy =
+llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
+RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_syncwarp");
+break;
+  }
   }
   return RTLFn;
 }
@@ -2700,6 +2718,9 @@
   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
   llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
 
+  // Get the mask of active threads in the warp.
+  llvm::Value *Mask = CGF.EmitRuntimeCall(
+  createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
   // Fetch team-local id of the thread.
   llvm::Value *ThreadID = getNVPTXThreadID(CGF);
 
@@ -2740,8 +2761,9 @@
   // Block waits for all threads in current team to finish then increments the
   // counter variable and returns to the loop.
   CGF.EmitBlock(SyncBB);
-  emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false,
-  /*ForceSimpleCall=*/true);
+  // Reconverge active threads in the warp.
+  (void)CGF.EmitRuntimeCall(
+  createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
 
   llvm::Value *IncCounterVal =
   CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));


Index: test/OpenMP/nvptx_parallel_codegen.cpp
===
--- test/OpenMP/nvptx_parallel_codegen.cpp
+++ test/OpenMP/nvptx_parallel_codegen.cpp
@@ -343,6 +343,7 @@
 
 // CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.*}})
 // CHECK:  [[CC:%.+]] = alloca i32,
+// CHECK:  [[MASK:%.+]] = call i32 @__kmpc_warp_active_thread_mask()
 // CHECK:  [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 // CHECK:  [[NUM_THREADS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK:  store i32 0, i32* [[CC]],
@@ -362,7 +363,7 @@
 // CHECK:  store i32
 // CHECK:  call void @__kmpc_end_critical(
 
-// CHECK:  call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
+// CHECK:  call void @__kmpc_syncwarp(i32 [[MASK]])
 // CHECK:  [[NEW_CC_VAL:%.+]] = add nsw i32 [[CC_VAL]], 1
 // CHECK:  store i32 [[NEW_CC_VAL]], i32* [[CC]],
 // CHECK:  br label
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -107,6 +107,10 @@
   /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
   /// global_tid);
   OMPRTL__kmpc_barrier_simple_spmd,
+  /// Call to int32_t __kmpc_warp_active_thread_mask(void);
+  OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
+  

[PATCH] D66673: [OPENMP][NVPTX]Fix critical region codegen.

2019-08-23 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev added a comment.

In D66673#1643544 , @jdoerfert wrote:

> I guess IR test should be affected already and it would be good to have the 
> run time test that breaks with barriers.


Runtime test is 
libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D66673/new/

https://reviews.llvm.org/D66673



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D66673: [OPENMP][NVPTX]Fix critical region codegen.

2019-08-23 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

I guess IR test should be affected already and it would be good to have the run 
time test that breaks with barriers.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D66673/new/

https://reviews.llvm.org/D66673



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D66673: [OPENMP][NVPTX]Fix critical region codegen.

2019-08-23 Thread Alexey Bataev via Phabricator via cfe-commits
ABataev created this revision.
ABataev added a reviewer: ABataev.
Herald added subscribers: cfe-commits, guansong, jholewinski.
Herald added a reviewer: jdoerfert.
Herald added a project: clang.

Previously critical regions were emitted with the barrier making it a
worksharing construct though it is not. Also, it leads to incorrect
behavior in Cuda9+. Patch fixes this problem.


Repository:
  rC Clang

https://reviews.llvm.org/D66673

Files:
  lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp


Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -107,6 +107,10 @@
   /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
   /// global_tid);
   OMPRTL__kmpc_barrier_simple_spmd,
+  /// Call to int32_t __kmpc_warp_active_thread_mask(void);
+  OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
+  /// Call to void __kmpc_syncwarp(int32_t Mask);
+  OMPRTL_NVPTX__kmpc_syncwarp,
 };
 
 /// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -1794,6 +1798,20 @@
 ->addFnAttr(llvm::Attribute::Convergent);
 break;
   }
+  case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
+// Build int32_t __kmpc_warp_active_thread_mask(void);
+auto *FnTy =
+llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
+RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
+break;
+  }
+  case OMPRTL_NVPTX__kmpc_syncwarp: {
+// Build void __kmpc_syncwarp(kmp_int32 Mask);
+auto *FnTy =
+llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
+RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_syncwarp");
+break;
+  }
   }
   return RTLFn;
 }
@@ -2700,6 +2718,9 @@
   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
   llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
 
+  // Get the mask of active threads in the warp.
+  llvm::Value *Mask = CGF.EmitRuntimeCall(
+  createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
   // Fetch team-local id of the thread.
   llvm::Value *ThreadID = getNVPTXThreadID(CGF);
 
@@ -2740,8 +2761,9 @@
   // Block waits for all threads in current team to finish then increments the
   // counter variable and returns to the loop.
   CGF.EmitBlock(SyncBB);
-  emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false,
-  /*ForceSimpleCall=*/true);
+  // Reconverge active threads in the warp.
+  (void)CGF.EmitRuntimeCall(
+  createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
 
   llvm::Value *IncCounterVal =
   CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));


Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -107,6 +107,10 @@
   /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
   /// global_tid);
   OMPRTL__kmpc_barrier_simple_spmd,
+  /// Call to int32_t __kmpc_warp_active_thread_mask(void);
+  OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
+  /// Call to void __kmpc_syncwarp(int32_t Mask);
+  OMPRTL_NVPTX__kmpc_syncwarp,
 };
 
 /// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -1794,6 +1798,20 @@
 ->addFnAttr(llvm::Attribute::Convergent);
 break;
   }
+  case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
+// Build int32_t __kmpc_warp_active_thread_mask(void);
+auto *FnTy =
+llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
+RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
+break;
+  }
+  case OMPRTL_NVPTX__kmpc_syncwarp: {
+// Build void __kmpc_syncwarp(kmp_int32 Mask);
+auto *FnTy =
+llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
+RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_syncwarp");
+break;
+  }
   }
   return RTLFn;
 }
@@ -2700,6 +2718,9 @@
   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
   llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
 
+  // Get the mask of active threads in the warp.
+  llvm::Value *Mask = CGF.EmitRuntimeCall(
+  createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
   // Fetch team-local id of the thread.
   llvm::Value *ThreadID = getNVPTXThreadID(CGF);
 
@@ -2740,8 +2761,9 @@
   // Block waits for all threads in current team to finish then increments the
   // counter variable and returns to the loop.
   CGF.EmitBlock(SyncBB);
-  emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false,
-  /*ForceSimpleCall=*/true);
+  // Reconverge active threads in the warp.
+  (void)CGF.EmitRuntimeCall(
+  createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
 
   llvm::Value *IncCounterVal =