[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield accepted this revision.
JonChesterfield added a comment.

Thanks! Delighted to see the subclasses gone


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113421/new/

https://reviews.llvm.org/D113421

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Atmn Patel via Phabricator via cfe-commits
atmnpatel updated this revision to Diff 385627.
atmnpatel added a comment.

small fixes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113421/new/

https://reviews.llvm.org/D113421

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  openmp/libomptarget/DeviceRTL/src/Mapping.cpp
  openmp/libomptarget/DeviceRTL/src/Utils.cpp
  openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
  openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h
  openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
===
--- openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@@ -102,10 +102,12 @@
 EXTERN int __kmpc_get_hardware_num_threads_in_block() {
   return __nvvm_read_ptx_sreg_ntid_x();
 }
+EXTERN unsigned __kmpc_get_warp_size() {
+  return WARPSIZE;
+}
 EXTERN unsigned GetWarpId() {
   return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE;
 }
-EXTERN unsigned GetWarpSize() { return WARPSIZE; }
 EXTERN unsigned GetLaneId() {
   return __kmpc_get_hardware_thread_id_in_block() & (WARPSIZE - 1);
 }
Index: openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h
===
--- openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h
+++ openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h
@@ -35,7 +35,7 @@
 ///{
 extern "C" {
 unsigned GetLaneId();
-unsigned GetWarpSize();
+unsigned __kmpc_get_warp_size();
 void __kmpc_impl_unpack(uint64_t val, uint32_t , uint32_t );
 uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi);
 }
@@ -60,7 +60,7 @@
 
 inline int32_t __kmpc_impl_shfl_sync(uint64_t Mask, int32_t Var,
  int32_t SrcLane) {
-  int Width = GetWarpSize();
+  int Width = __kmpc_get_warp_size();
   int Self = GetLaneId();
   int Index = SrcLane + (Self & ~(Width - 1));
   return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
@@ -90,7 +90,7 @@
 
 inline int32_t __kmpc_impl_shfl_down_sync(uint64_t Mask, int32_t Var,
   uint32_t Delta, int32_t Width) {
-  int32_t T = ((GetWarpSize() - Width) << 8) | 0x1f;
+  int32_t T = ((__kmpc_get_warp_size() - Width) << 8) | 0x1f;
   return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
 }
 
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
===
--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -133,8 +133,11 @@
__builtin_amdgcn_workgroup_size_x());
 }
 
+EXTERN unsigned __kmpc_get_warp_size() {
+  return WARPSIZE;
+}
+
 EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; }
-EXTERN unsigned GetWarpSize() { return WARPSIZE; }
 EXTERN unsigned GetLaneId() {
   return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
 }
Index: openmp/libomptarget/DeviceRTL/src/Utils.cpp
===
--- openmp/libomptarget/DeviceRTL/src/Utils.cpp
+++ openmp/libomptarget/DeviceRTL/src/Utils.cpp
@@ -24,6 +24,7 @@
 __attribute__((used, weak, optnone)) void keepAlive() {
   __kmpc_get_hardware_thread_id_in_block();
   __kmpc_get_hardware_num_threads_in_block();
+  __kmpc_get_warp_size();
   __kmpc_barrier_simple_spmd(nullptr, 0);
   __kmpc_barrier_simple_generic(nullptr, 0);
 }
Index: openmp/libomptarget/DeviceRTL/src/Mapping.cpp
===
--- openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -277,5 +277,10 @@
   FunctionTracingRAII();
   return impl::getNumHardwareThreadsInBlock();
 }
+
+__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
+  FunctionTracingRAII();
+  return impl::getWarpSize();
+}
 }
 #pragma omp end declare target
Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -455,6 +455,8 @@
 __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
 __OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
 
+__OMP_RTL(__kmpc_get_warp_size, false, Int32, )
+
 __OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32)
 
 __OMP_RTL(__last, false, Void, )
Index: clang/lib/CodeGen/CodeGenModule.cpp

[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Atmn Patel via Phabricator via cfe-commits
atmnpatel updated this revision to Diff 385626.
atmnpatel added a comment.

Yep yep.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113421/new/

https://reviews.llvm.org/D113421

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  openmp/libomptarget/DeviceRTL/src/Mapping.cpp
  openmp/libomptarget/DeviceRTL/src/Utils.cpp
  openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
  openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h
  openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu

Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
===
--- openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@@ -105,11 +105,14 @@
 EXTERN unsigned GetWarpId() {
   return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE;
 }
-EXTERN unsigned GetWarpSize() { return WARPSIZE; }
 EXTERN unsigned GetLaneId() {
   return __kmpc_get_hardware_thread_id_in_block() & (WARPSIZE - 1);
 }
 
+unsigned __kmpc_get_warp_size() {
+  return WARPSIZE;
+}
+
 // Atomics
 uint32_t __kmpc_atomic_add(uint32_t *Address, uint32_t Val) {
   return __atomic_fetch_add(Address, Val, __ATOMIC_SEQ_CST);
Index: openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h
===
--- openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h
+++ openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h
@@ -35,7 +35,7 @@
 ///{
 extern "C" {
 unsigned GetLaneId();
-unsigned GetWarpSize();
+unsigned __kmpc_get_warp_size();
 void __kmpc_impl_unpack(uint64_t val, uint32_t , uint32_t );
 uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi);
 }
@@ -60,7 +60,7 @@
 
 inline int32_t __kmpc_impl_shfl_sync(uint64_t Mask, int32_t Var,
  int32_t SrcLane) {
-  int Width = GetWarpSize();
+  int Width = __kmpc_get_warp_size();
   int Self = GetLaneId();
   int Index = SrcLane + (Self & ~(Width - 1));
   return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
@@ -90,7 +90,7 @@
 
 inline int32_t __kmpc_impl_shfl_down_sync(uint64_t Mask, int32_t Var,
   uint32_t Delta, int32_t Width) {
-  int32_t T = ((GetWarpSize() - Width) << 8) | 0x1f;
+  int32_t T = ((__kmpc_get_warp_size() - Width) << 8) | 0x1f;
   return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
 }
 
Index: openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
===
--- openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -133,8 +133,11 @@
__builtin_amdgcn_workgroup_size_x());
 }
 
+uint32_t __kmpc_get_warp_size() {
+  return WARPSIZE;
+}
+
 EXTERN unsigned GetWarpId() { return __kmpc_get_hardware_thread_id_in_block() / WARPSIZE; }
-EXTERN unsigned GetWarpSize() { return WARPSIZE; }
 EXTERN unsigned GetLaneId() {
   return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
 }
Index: openmp/libomptarget/DeviceRTL/src/Utils.cpp
===
--- openmp/libomptarget/DeviceRTL/src/Utils.cpp
+++ openmp/libomptarget/DeviceRTL/src/Utils.cpp
@@ -24,6 +24,7 @@
 __attribute__((used, weak, optnone)) void keepAlive() {
   __kmpc_get_hardware_thread_id_in_block();
   __kmpc_get_hardware_num_threads_in_block();
+  __kmpc_get_warp_size();
   __kmpc_barrier_simple_spmd(nullptr, 0);
   __kmpc_barrier_simple_generic(nullptr, 0);
 }
Index: openmp/libomptarget/DeviceRTL/src/Mapping.cpp
===
--- openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -277,5 +277,10 @@
   FunctionTracingRAII();
   return impl::getNumHardwareThreadsInBlock();
 }
+
+__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
+  FunctionTracingRAII();
+  return impl::getWarpSize();
+}
 }
 #pragma omp end declare target
Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -455,6 +455,8 @@
 __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
 __OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
 
+__OMP_RTL(__kmpc_get_warp_size, false, Int32, )
+
 __OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32)
 
 __OMP_RTL(__last, false, Void, )
Index: 

[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added a comment.

I think __kmpc_get_warp_size needs an entry in Utils.cpp keepAlive(), and 
corresponding functions added to `deviceRTLs/*/src/target_impl.*`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113421/new/

https://reviews.llvm.org/D113421

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 accepted this revision.
tianshilei1992 added a comment.

LG


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113421/new/

https://reviews.llvm.org/D113421

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Atmn Patel via Phabricator via cfe-commits
atmnpatel updated this revision to Diff 385615.
atmnpatel added a comment.

Remove intrinsic includes


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113421/new/

https://reviews.llvm.org/D113421

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  openmp/libomptarget/DeviceRTL/src/Mapping.cpp

Index: openmp/libomptarget/DeviceRTL/src/Mapping.cpp
===
--- openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -277,5 +277,10 @@
   FunctionTracingRAII();
   return impl::getNumHardwareThreadsInBlock();
 }
+
+__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
+  FunctionTracingRAII();
+  return impl::getWarpSize();
+}
 }
 #pragma omp end declare target
Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -455,6 +455,8 @@
 __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
 __OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
 
+__OMP_RTL(__kmpc_get_warp_size, false, Int32, )
+
 __OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32)
 
 __OMP_RTL(__last, false, Void, )
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -19,8 +19,7 @@
 #include "CGObjCRuntime.h"
 #include "CGOpenCLRuntime.h"
 #include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeAMDGCN.h"
-#include "CGOpenMPRuntimeNVPTX.h"
+#include "CGOpenMPRuntimeGPU.h"
 #include "CodeGenFunction.h"
 #include "CodeGenPGO.h"
 #include "ConstantEmitter.h"
@@ -244,14 +243,10 @@
   switch (getTriple().getArch()) {
   case llvm::Triple::nvptx:
   case llvm::Triple::nvptx64:
-assert(getLangOpts().OpenMPIsDevice &&
-   "OpenMP NVPTX is only prepared to deal with device code.");
-OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
-break;
   case llvm::Triple::amdgcn:
 assert(getLangOpts().OpenMPIsDevice &&
-   "OpenMP AMDGCN is only prepared to deal with device code.");
-OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this));
+   "OpenMP AMDGPU/NVPTX is only prepared to deal with device code.");
+OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this));
 break;
   default:
 if (LangOpts.OpenMPSimd)
Index: clang/lib/CodeGen/CMakeLists.txt
===
--- clang/lib/CodeGen/CMakeLists.txt
+++ clang/lib/CodeGen/CMakeLists.txt
@@ -59,9 +59,7 @@
   CGObjCRuntime.cpp
   CGOpenCLRuntime.cpp
   CGOpenMPRuntime.cpp
-  CGOpenMPRuntimeAMDGCN.cpp
   CGOpenMPRuntimeGPU.cpp
-  CGOpenMPRuntimeNVPTX.cpp
   CGRecordLayoutBuilder.cpp
   CGStmt.cpp
   CGStmtOpenMP.cpp
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
===
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===--===//
-//
-// This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===--===//
-
-#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
-#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
-
-#include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/StmtOpenMP.h"
-
-namespace clang {
-namespace CodeGen {
-
-class CGOpenMPRuntimeNVPTX final : public CGOpenMPRuntimeGPU {
-
-public:
-  explicit CGOpenMPRuntimeNVPTX(CodeGenModule );
-
-  /// Get the GPU warp size.
-  llvm::Value *getGPUWarpSize(CodeGenFunction ) override;
-
-  /// Get the id of the current thread on the GPU.
-  llvm::Value *getGPUThreadID(CodeGenFunction ) override;
-};
-
-} // CodeGen namespace.
-} // clang namespace.
-
-#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-//=== 

[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert accepted this revision.
jdoerfert added a comment.
This revision is now accepted and ready to land.

LG




Comment at: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp:23
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"

needed? also the intrinsic include below. They should not be (maybe through a 
follow up commit) as we should get rid of all intrinsic uses here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113421/new/

https://reviews.llvm.org/D113421

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Atmn Patel via Phabricator via cfe-commits
atmnpatel updated this revision to Diff 385611.
atmnpatel added a comment.
Herald added projects: OpenMP, LLVM.
Herald added subscribers: llvm-commits, openmp-commits.

Updates.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113421/new/

https://reviews.llvm.org/D113421

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  openmp/libomptarget/DeviceRTL/src/Mapping.cpp

Index: openmp/libomptarget/DeviceRTL/src/Mapping.cpp
===
--- openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -277,5 +277,10 @@
   FunctionTracingRAII();
   return impl::getNumHardwareThreadsInBlock();
 }
+
+__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
+  FunctionTracingRAII();
+  return impl::getWarpSize();
+}
 }
 #pragma omp end declare target
Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -455,6 +455,8 @@
 __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
 __OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
 
+__OMP_RTL(__kmpc_get_warp_size, false, Int32, )
+
 __OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32)
 
 __OMP_RTL(__last, false, Void, )
Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -19,8 +19,7 @@
 #include "CGObjCRuntime.h"
 #include "CGOpenCLRuntime.h"
 #include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeAMDGCN.h"
-#include "CGOpenMPRuntimeNVPTX.h"
+#include "CGOpenMPRuntimeGPU.h"
 #include "CodeGenFunction.h"
 #include "CodeGenPGO.h"
 #include "ConstantEmitter.h"
@@ -244,14 +243,10 @@
   switch (getTriple().getArch()) {
   case llvm::Triple::nvptx:
   case llvm::Triple::nvptx64:
-assert(getLangOpts().OpenMPIsDevice &&
-   "OpenMP NVPTX is only prepared to deal with device code.");
-OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
-break;
   case llvm::Triple::amdgcn:
 assert(getLangOpts().OpenMPIsDevice &&
-   "OpenMP AMDGCN is only prepared to deal with device code.");
-OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this));
+   "OpenMP AMDGPU/NVPTX is only prepared to deal with device code.");
+OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this));
 break;
   default:
 if (LangOpts.OpenMPSimd)
Index: clang/lib/CodeGen/CMakeLists.txt
===
--- clang/lib/CodeGen/CMakeLists.txt
+++ clang/lib/CodeGen/CMakeLists.txt
@@ -59,9 +59,7 @@
   CGObjCRuntime.cpp
   CGOpenCLRuntime.cpp
   CGOpenMPRuntime.cpp
-  CGOpenMPRuntimeAMDGCN.cpp
   CGOpenMPRuntimeGPU.cpp
-  CGOpenMPRuntimeNVPTX.cpp
   CGRecordLayoutBuilder.cpp
   CGStmt.cpp
   CGStmtOpenMP.cpp
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
===
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===--===//
-//
-// This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===--===//
-
-#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
-#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
-
-#include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/StmtOpenMP.h"
-
-namespace clang {
-namespace CodeGen {
-
-class CGOpenMPRuntimeNVPTX final : public CGOpenMPRuntimeGPU {
-
-public:
-  explicit CGOpenMPRuntimeNVPTX(CodeGenModule );
-
-  /// Get the GPU warp size.
-  llvm::Value *getGPUWarpSize(CodeGenFunction ) override;
-
-  /// Get the id of the current thread on the GPU.
-  llvm::Value *getGPUThreadID(CodeGenFunction ) override;
-};
-
-} // CodeGen namespace.
-} // clang namespace.
-
-#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===
--- 

[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added a comment.

Example of the function as opposed to intrinsics is 
__kmpc_get_hardware_num_threads_in_block from just above where you've modified. 
That corresponds to a function in the device runtime, e.g.

  int __kmpc_get_hardware_num_threads_in_block() {
return get_workgroup_dim(__builtin_amdgcn_workgroup_id_x(),
 __builtin_amdgcn_grid_size_x(),
 __builtin_amdgcn_workgroup_size_x());
  }

and

  int __kmpc_get_hardware_num_threads_in_block() {
return __nvvm_read_ptx_sreg_ntid_x();
  }




Comment at: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp:3968
+
+  if (Triple.isNVPTX()) {
+llvm::Function *F = llvm::Intrinsic::getDeclaration(

This does work. The benefit of adding the functions to the device runtime 
(which contain these intrinsic calls) is we get uniformity of the generated IR, 
modulo the unfortunate addrspace casts, so we can do nice things like pattern 
match on the name of the device runtime function



Comment at: clang/lib/CodeGen/CodeGenModule.cpp:245
   case llvm::Triple::nvptx:
   case llvm::Triple::nvptx64:
 assert(getLangOpts().OpenMPIsDevice &&

Looks like we could fold these cases by renaming the assert


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113421/new/

https://reviews.llvm.org/D113421

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D113421#3116155 , @tianshilei1992 
wrote:

> I remember at some point we want to just emit function call to get those 
> information like thread id.

Yes, no intrinsics please. Add `__kmpc_get_warp_size` and similar functions to 
both runtimes and then emit calls to them instead.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113421/new/

https://reviews.llvm.org/D113421

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Shilei Tian via Phabricator via cfe-commits
tianshilei1992 added a comment.

I remember at some point we want to just emit function call to get those 
information like thread id.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113421/new/

https://reviews.llvm.org/D113421

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D113421: [clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files

2021-11-08 Thread Atmn Patel via Phabricator via cfe-commits
atmnpatel created this revision.
atmnpatel added reviewers: jdoerfert, JonChesterfield, tianshilei1992.
Herald added subscribers: asavonic, guansong, yaxunl, mgorny, jvesely, 
jholewinski.
atmnpatel requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

The existing CGOpenMPRuntimeAMDGCN and CGOpenMPRuntimeNVPTX classes are
just code bloat. By removing them, the codebase gets a bit cleaner.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D113421

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenModule.cpp

Index: clang/lib/CodeGen/CodeGenModule.cpp
===
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -19,8 +19,7 @@
 #include "CGObjCRuntime.h"
 #include "CGOpenCLRuntime.h"
 #include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeAMDGCN.h"
-#include "CGOpenMPRuntimeNVPTX.h"
+#include "CGOpenMPRuntimeGPU.h"
 #include "CodeGenFunction.h"
 #include "CodeGenPGO.h"
 #include "ConstantEmitter.h"
@@ -246,12 +245,12 @@
   case llvm::Triple::nvptx64:
 assert(getLangOpts().OpenMPIsDevice &&
"OpenMP NVPTX is only prepared to deal with device code.");
-OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
+OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this));
 break;
   case llvm::Triple::amdgcn:
 assert(getLangOpts().OpenMPIsDevice &&
"OpenMP AMDGCN is only prepared to deal with device code.");
-OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this));
+OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this));
 break;
   default:
 if (LangOpts.OpenMPSimd)
Index: clang/lib/CodeGen/CMakeLists.txt
===
--- clang/lib/CodeGen/CMakeLists.txt
+++ clang/lib/CodeGen/CMakeLists.txt
@@ -59,9 +59,7 @@
   CGObjCRuntime.cpp
   CGOpenCLRuntime.cpp
   CGOpenMPRuntime.cpp
-  CGOpenMPRuntimeAMDGCN.cpp
   CGOpenMPRuntimeGPU.cpp
-  CGOpenMPRuntimeNVPTX.cpp
   CGRecordLayoutBuilder.cpp
   CGStmt.cpp
   CGStmtOpenMP.cpp
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
===
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===--===//
-//
-// This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===--===//
-
-#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
-#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
-
-#include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/StmtOpenMP.h"
-
-namespace clang {
-namespace CodeGen {
-
-class CGOpenMPRuntimeNVPTX final : public CGOpenMPRuntimeGPU {
-
-public:
-  explicit CGOpenMPRuntimeNVPTX(CodeGenModule );
-
-  /// Get the GPU warp size.
-  llvm::Value *getGPUWarpSize(CodeGenFunction ) override;
-
-  /// Get the id of the current thread on the GPU.
-  llvm::Value *getGPUThreadID(CodeGenFunction ) override;
-};
-
-} // CodeGen namespace.
-} // clang namespace.
-
-#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-//=== CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===--===//
-//
-// This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===--===//
-
-#include "CGOpenMPRuntimeNVPTX.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/Attr.h"
-#include "clang/AST/DeclOpenMP.h"
-#include "clang/AST/StmtOpenMP.h"
-#include "clang/AST/StmtVisitor.h"
-#include "clang/Basic/Cuda.h"
-#include