[PATCH] D108380: [openmp][nfc] Refactor GridValues

Jon Chesterfield via Phabricator via cfe-commits Thu, 19 Aug 2021 08:34:28 -0700

JonChesterfield created this revision.
JonChesterfield added reviewers: jdoerfert, dpalermo, gregrodgers, ronlieb, 
tianshilei1992, grokos, atmnpatel.
Herald added subscribers: kerbowa, guansong, yaxunl, nhaehnle, jvesely, 
jholewinski.
JonChesterfield requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, sstefan1.
Herald added projects: clang, LLVM.


Remove  redundant fields and replace pointer with virtual function

Of fourteen fields, three are dead and four can be computed from the
remainder. This leaves a couple of currently dead fields in place as
they are expected to be used from the deviceRTL shortly.

This change leaves the new methods in the same location in the struct
as the previous values and includes static asserts that the values are
unchanged. This is for ease of verifying the review, methods will be
grouped together and the static asserts dropped post commit.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D108380

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/AMDGPU.h
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/Basic/Targets/NVPTX.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

Index: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -62,19 +62,21 @@
   const unsigned GV_Slot_Size;
   /// The default value of maximum number of threads in a worker warp.
   const unsigned GV_Warp_Size;
-  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
-  /// for NVPTX.
-  const unsigned GV_Warp_Size_32;
+
   /// The number of bits required to represent the max number of threads in warp
-  const unsigned GV_Warp_Size_Log2;
-  /// GV_Warp_Size * GV_Slot_Size,
-  const unsigned GV_Warp_Slot_Size;
+  constexpr unsigned warpSizeLog2() const { return log2(GV_Warp_Size); }
+
+  constexpr unsigned warpSlotSize() const {
+    return GV_Warp_Size * GV_Slot_Size;
+  }
+
+  constexpr unsigned warpSizeLog2Mask() const {
+    return ~0u >> (32u - warpSizeLog2());
+  }
+
   /// the maximum number of teams.
   const unsigned GV_Max_Teams;
-  /// Global Memory Alignment
-  const unsigned GV_Mem_Align;
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_Mask;
+
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
@@ -83,49 +85,55 @@
   const unsigned GV_Max_WG_Size;
   // The default maximum team size for a working group
   const unsigned GV_Default_WG_Size;
-  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  const unsigned GV_Max_Warp_Number;
-  /// The slot size that should be reserved for a working warp.
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_MaskL;
+
+  constexpr unsigned maxWarpNumber() const {
+    return GV_Max_WG_Size / GV_Warp_Size;
+  }
+
+private:
+  static constexpr unsigned log2(unsigned I) {
+    // assumes I is nonzero power of 2
+    // reimplemented here for use from freestanding devicertl
+    unsigned R = 0;
+    while (I >>= 1) {
+      R++;
+    }
+    return R;
+  }
 };
 
 /// For AMDGPU GPUs
 static constexpr GV AMDGPUGridValues = {
-    448,       // GV_Threads
-    256,       // GV_Slot_Size
-    64,        // GV_Warp_Size
-    32,        // GV_Warp_Size_32
-    6,         // GV_Warp_Size_Log2
-    64 * 256,  // GV_Warp_Slot_Size
-    128,       // GV_Max_Teams
-    256,       // GV_Mem_Align
-    63,        // GV_Warp_Size_Log2_Mask
-    896,       // GV_SimpleBufferSize
-    1024,      // GV_Max_WG_Size,
-    256,       // GV_Defaut_WG_Size
-    1024 / 64, // GV_Max_WG_Size / GV_WarpSize
-    63         // GV_Warp_Size_Log2_MaskL
+    448,  // GV_Threads
+    256,  // GV_Slot_Size
+    64,   // GV_Warp_Size
+    128,  // GV_Max_Teams
+    896,  // GV_SimpleBufferSize
+    1024, // GV_Max_WG_Size,
+    256,  // GV_Default_WG_Size
 };
 
+static_assert(6 == AMDGPUGridValues.warpSizeLog2(), "");
+static_assert(64 * 256 == AMDGPUGridValues.warpSlotSize(), "");
+static_assert(63 == AMDGPUGridValues.warpSizeLog2Mask(), "");
+static_assert(1024 / 64 == AMDGPUGridValues.maxWarpNumber(), "");
+
 /// For Nvidia GPUs
 static constexpr GV NVPTXGridValues = {
-    992,               // GV_Threads
-    256,               // GV_Slot_Size
-    32,                // GV_Warp_Size
-    32,                // GV_Warp_Size_32
-    5,                 // GV_Warp_Size_Log2
-    32 * 256,          // GV_Warp_Slot_Size
-    1024,              // GV_Max_Teams
-    256,               // GV_Mem_Align
-    (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
-    896,               // GV_SimpleBufferSize
-    1024,              // GV_Max_WG_Size
-    128,               // GV_Defaut_WG_Size
-    1024 / 32,         // GV_Max_WG_Size / GV_WarpSize
-    31                 // GV_Warp_Size_Log2_MaskL
+    992,  // GV_Threads
+    256,  // GV_Slot_Size
+    32,   // GV_Warp_Size
+    1024, // GV_Max_Teams
+    896,  // GV_SimpleBufferSize
+    1024, // GV_Max_WG_Size
+    128,  // GV_Default_WG_Size
 };
 
+static_assert(5 == NVPTXGridValues.warpSizeLog2(), "");
+static_assert(32 * 256 == NVPTXGridValues.warpSlotSize(), "");
+static_assert((~0u >> (32 - 5)) == NVPTXGridValues.warpSizeLog2Mask(), "");
+static_assert(1024 / 32 == NVPTXGridValues.maxWarpNumber(), "");
+
 } // namespace omp
 } // namespace llvm
 
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -106,8 +106,7 @@
 /// is the same for all known NVPTX architectures.
 enum MachineConfiguration : unsigned {
   /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
-  /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
-  /// and GV_Warp_Size_Log2_Mask.
+  /// specific Grid Values like GV_Warp_Size, GV_Slot_Size
 
   /// Global memory alignment for performance.
   GlobalMemoryAlignment = 128,
@@ -535,7 +534,7 @@
 /// on the NVPTX device, to generate more efficient code.
 static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
-  unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
+  unsigned LaneIDBits = CGF.getTarget().getGridValue().warpSizeLog2();
   auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
   return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
 }
@@ -546,7 +545,7 @@
 static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
   unsigned LaneIDMask =
-      CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
+      CGF.getContext().getTargetInfo().getGridValue().warpSizeLog2Mask();
   auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
   return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
                        "nvptx_lane_id");
Index: clang/lib/Basic/Targets/NVPTX.h
===================================================================
--- clang/lib/Basic/Targets/NVPTX.h
+++ clang/lib/Basic/Targets/NVPTX.h
@@ -147,6 +147,10 @@
     Opts["cl_khr_local_int32_extended_atomics"] = true;
   }
 
+  const llvm::omp::GV &getGridValue() const override {
+    return llvm::omp::NVPTXGridValues;
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return the
   /// corresponding target specific DWARF address space.
Index: clang/lib/Basic/Targets/NVPTX.cpp
===================================================================
--- clang/lib/Basic/Targets/NVPTX.cpp
+++ clang/lib/Basic/Targets/NVPTX.cpp
@@ -16,7 +16,6 @@
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
 using namespace clang;
 using namespace clang::targets;
@@ -65,7 +64,6 @@
   TLSSupported = false;
   VLASupported = false;
   AddrSpaceMap = &NVPTXAddrSpaceMap;
-  GridValues = &llvm::omp::NVPTXGridValues;
   UseAddrSpaceMapMangling = true;
 
   // Define available target features
Index: clang/lib/Basic/Targets/AMDGPU.h
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.h
+++ clang/lib/Basic/Targets/AMDGPU.h
@@ -359,6 +359,10 @@
     return getLangASFromTargetAS(Constant);
   }
 
+  const llvm::omp::GV &getGridValue() const override {
+    return llvm::omp::AMDGPUGridValues;
+  }
+
   /// \returns Target specific vtbl ptr address space.
   unsigned getVtblPtrAddressSpace() const override {
     return static_cast<unsigned>(Constant);
Index: clang/lib/Basic/Targets/AMDGPU.cpp
===================================================================
--- clang/lib/Basic/Targets/AMDGPU.cpp
+++ clang/lib/Basic/Targets/AMDGPU.cpp
@@ -17,7 +17,6 @@
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
 using namespace clang;
 using namespace clang::targets;
@@ -335,7 +334,6 @@
                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
                                         : DataLayoutStringR600);
-  GridValues = &llvm::omp::AMDGPUGridValues;
 
   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
                      !isAMDGCN(Triple));
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -210,9 +210,6 @@
   unsigned char RegParmMax, SSERegParmMax;
   TargetCXXABI TheCXXABI;
   const LangASMap *AddrSpaceMap;
-  const llvm::omp::GV *GridValues =
-      nullptr; // target-specific GPU grid values that must be
-               // consistent between host RTL (plugin), device RTL, and clang.
 
   mutable StringRef PlatformName;
   mutable VersionTuple PlatformMinVersion;
@@ -1410,10 +1407,10 @@
     return LangAS::Default;
   }
 
-  /// Return a target-specific GPU grid values
-  const llvm::omp::GV &getGridValue() const {
-    assert(GridValues != nullptr && "GridValues not initialized");
-    return *GridValues;
+  // access target-specific GPU grid values that must be consistent between
+  // host RTL (plugin), deviceRTL and clang.
+  virtual const llvm::omp::GV &getGridValue() const {
+    llvm_unreachable("getGridValue not implemented on this target");
   }
 
   /// Retrieve the name of the platform as it is used in the

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D108380: [openmp][nfc] Refactor GridValues

Reply via email to