[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-19 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield created this revision.
JonChesterfield added reviewers: jdoerfert, dpalermo, gregrodgers, ronlieb, 
tianshilei1992, grokos, atmnpatel.
Herald added subscribers: kerbowa, guansong, yaxunl, nhaehnle, jvesely, 
jholewinski.
JonChesterfield requested review of this revision.
Herald added subscribers: llvm-commits, cfe-commits, sstefan1.
Herald added projects: clang, LLVM.

Remove  redundant fields and replace pointer with virtual function

Of fourteen fields, three are dead and four can be computed from the
remainder. This leaves a couple of currently dead fields in place as
they are expected to be used from the deviceRTL shortly.

This change leaves the new methods in the same location in the struct
as the previous values and includes static asserts that the values are
unchanged. This is for ease of verifying the review, methods will be
grouped together and the static asserts dropped post commit.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D108380

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/AMDGPU.h
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/Basic/Targets/NVPTX.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

Index: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
===
--- llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -62,19 +62,21 @@
   const unsigned GV_Slot_Size;
   /// The default value of maximum number of threads in a worker warp.
   const unsigned GV_Warp_Size;
-  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
-  /// for NVPTX.
-  const unsigned GV_Warp_Size_32;
+
   /// The number of bits required to represent the max number of threads in warp
-  const unsigned GV_Warp_Size_Log2;
-  /// GV_Warp_Size * GV_Slot_Size,
-  const unsigned GV_Warp_Slot_Size;
+  constexpr unsigned warpSizeLog2() const { return log2(GV_Warp_Size); }
+
+  constexpr unsigned warpSlotSize() const {
+return GV_Warp_Size * GV_Slot_Size;
+  }
+
+  constexpr unsigned warpSizeLog2Mask() const {
+return ~0u >> (32u - warpSizeLog2());
+  }
+
   /// the maximum number of teams.
   const unsigned GV_Max_Teams;
-  /// Global Memory Alignment
-  const unsigned GV_Mem_Align;
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_Mask;
+
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
@@ -83,49 +85,55 @@
   const unsigned GV_Max_WG_Size;
   // The default maximum team size for a working group
   const unsigned GV_Default_WG_Size;
-  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  const unsigned GV_Max_Warp_Number;
-  /// The slot size that should be reserved for a working warp.
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_MaskL;
+
+  constexpr unsigned maxWarpNumber() const {
+return GV_Max_WG_Size / GV_Warp_Size;
+  }
+
+private:
+  static constexpr unsigned log2(unsigned I) {
+// assumes I is nonzero power of 2
+// reimplemented here for use from freestanding devicertl
+unsigned R = 0;
+while (I >>= 1) {
+  R++;
+}
+return R;
+  }
 };
 
 /// For AMDGPU GPUs
 static constexpr GV AMDGPUGridValues = {
-448,   // GV_Threads
-256,   // GV_Slot_Size
-64,// GV_Warp_Size
-32,// GV_Warp_Size_32
-6, // GV_Warp_Size_Log2
-64 * 256,  // GV_Warp_Slot_Size
-128,   // GV_Max_Teams
-256,   // GV_Mem_Align
-63,// GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size,
-256,   // GV_Defaut_WG_Size
-1024 / 64, // GV_Max_WG_Size / GV_WarpSize
-63 // GV_Warp_Size_Log2_MaskL
+448,  // GV_Threads
+256,  // GV_Slot_Size
+64,   // GV_Warp_Size
+128,  // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size,
+256,  // GV_Default_WG_Size
 };
 
+static_assert(6 == AMDGPUGridValues.warpSizeLog2(), "");
+static_assert(64 * 256 == AMDGPUGridValues.warpSlotSize(), "");
+static_assert(63 == AMDGPUGridValues.warpSizeLog2Mask(), "");
+static_assert(1024 / 64 == AMDGPUGridValues.maxWarpNumber(), "");
+
 /// For Nvidia GPUs
 static constexpr GV NVPTXGridValues = {
-992,   // GV_Threads
-256,   // GV_Slot_Size
-32,// GV_Warp_Size
-32,// GV_Warp_Size_32
-5, // GV_Warp_Size_Log2
-32 * 256,  // GV_Warp_Slot_Size
-1024,  // GV_Max_Teams
-256,   // GV_Mem_Align
-(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
-896,   // GV_Si

[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-19 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield updated this revision to Diff 367513.
JonChesterfield added a comment.

- reorder field


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/AMDGPU.h
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/Basic/Targets/NVPTX.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

Index: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
===
--- llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -62,19 +62,21 @@
   const unsigned GV_Slot_Size;
   /// The default value of maximum number of threads in a worker warp.
   const unsigned GV_Warp_Size;
-  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
-  /// for NVPTX.
-  const unsigned GV_Warp_Size_32;
+
   /// The number of bits required to represent the max number of threads in warp
-  const unsigned GV_Warp_Size_Log2;
-  /// GV_Warp_Size * GV_Slot_Size,
-  const unsigned GV_Warp_Slot_Size;
+  constexpr unsigned warpSizeLog2() const { return log2(GV_Warp_Size); }
+
+  constexpr unsigned warpSlotSize() const {
+return GV_Warp_Size * GV_Slot_Size;
+  }
+
   /// the maximum number of teams.
   const unsigned GV_Max_Teams;
-  /// Global Memory Alignment
-  const unsigned GV_Mem_Align;
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_Mask;
+
+  constexpr unsigned warpSizeLog2Mask() const {
+return ~0u >> (32u - warpSizeLog2());
+  }
+
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
@@ -83,49 +85,55 @@
   const unsigned GV_Max_WG_Size;
   // The default maximum team size for a working group
   const unsigned GV_Default_WG_Size;
-  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  const unsigned GV_Max_Warp_Number;
-  /// The slot size that should be reserved for a working warp.
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_MaskL;
+
+  constexpr unsigned maxWarpNumber() const {
+return GV_Max_WG_Size / GV_Warp_Size;
+  }
+
+private:
+  static constexpr unsigned log2(unsigned I) {
+// assumes I is nonzero power of 2
+// reimplemented here for use from freestanding devicertl
+unsigned R = 0;
+while (I >>= 1) {
+  R++;
+}
+return R;
+  }
 };
 
 /// For AMDGPU GPUs
 static constexpr GV AMDGPUGridValues = {
-448,   // GV_Threads
-256,   // GV_Slot_Size
-64,// GV_Warp_Size
-32,// GV_Warp_Size_32
-6, // GV_Warp_Size_Log2
-64 * 256,  // GV_Warp_Slot_Size
-128,   // GV_Max_Teams
-256,   // GV_Mem_Align
-63,// GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size,
-256,   // GV_Defaut_WG_Size
-1024 / 64, // GV_Max_WG_Size / GV_WarpSize
-63 // GV_Warp_Size_Log2_MaskL
+448,  // GV_Threads
+256,  // GV_Slot_Size
+64,   // GV_Warp_Size
+128,  // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size,
+256,  // GV_Default_WG_Size
 };
 
+static_assert(6 == AMDGPUGridValues.warpSizeLog2(), "");
+static_assert(64 * 256 == AMDGPUGridValues.warpSlotSize(), "");
+static_assert(63 == AMDGPUGridValues.warpSizeLog2Mask(), "");
+static_assert(1024 / 64 == AMDGPUGridValues.maxWarpNumber(), "");
+
 /// For Nvidia GPUs
 static constexpr GV NVPTXGridValues = {
-992,   // GV_Threads
-256,   // GV_Slot_Size
-32,// GV_Warp_Size
-32,// GV_Warp_Size_32
-5, // GV_Warp_Size_Log2
-32 * 256,  // GV_Warp_Slot_Size
-1024,  // GV_Max_Teams
-256,   // GV_Mem_Align
-(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size
-128,   // GV_Defaut_WG_Size
-1024 / 32, // GV_Max_WG_Size / GV_WarpSize
-31 // GV_Warp_Size_Log2_MaskL
+992,  // GV_Threads
+256,  // GV_Slot_Size
+32,   // GV_Warp_Size
+1024, // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size
+128,  // GV_Default_WG_Size
 };
 
+static_assert(5 == NVPTXGridValues.warpSizeLog2(), "");
+static_assert(32 * 256 == NVPTXGridValues.warpSlotSize(), "");
+static_assert((~0u >> (32 - 5)) == NVPTXGridValues.warpSizeLog2Mask(), "");
+static_assert(1024 / 32 == NVPTXGridValues.maxWarpNumber(), "");
+
 } // namespace omp
 } // namespace llvm
 
Index: clang/lib/CodeGen/C

[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-19 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added inline comments.



Comment at: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h:102
+return R;
+  }
 };

It should be in the device rtl then, no?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-19 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added inline comments.



Comment at: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h:102
+return R;
+  }
 };

jdoerfert wrote:
> It should be in the device rtl then, no?
This header is currently used from clang and the (amdgpu, could also be cuda if 
we like) host plugin. Possibly also from llvm. As of D108391 it would be used 
from the devicertl.

The idea is to have a single source of truth for the various magic numbers that 
the pieces should agree on and llvm is the common point on the dependency tree. 
I'm currently interested in that because I want to change some of them for 
gfx10 and have that magically ripple through the components. I'm not totally 
confident that will work out nicely for the host plugin as it has to 
dynamically handle different architectures but I think it'll be good enough.

It's not totally ideal to hand spin a function that is in the math support 
header but I also don't want to try to make various llvm headers 
ffreestanding-safe.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-19 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added inline comments.



Comment at: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h:102
+return R;
+  }
 };

JonChesterfield wrote:
> jdoerfert wrote:
> > It should be in the device rtl then, no?
> This header is currently used from clang and the (amdgpu, could also be cuda 
> if we like) host plugin. Possibly also from llvm. As of D108391 it would be 
> used from the devicertl.
> 
> The idea is to have a single source of truth for the various magic numbers 
> that the pieces should agree on and llvm is the common point on the 
> dependency tree. I'm currently interested in that because I want to change 
> some of them for gfx10 and have that magically ripple through the components. 
> I'm not totally confident that will work out nicely for the host plugin as it 
> has to dynamically handle different architectures but I think it'll be good 
> enough.
> 
> It's not totally ideal to hand spin a function that is in the math support 
> header but I also don't want to try to make various llvm headers 
> ffreestanding-safe.
>  It's not totally ideal to hand spin a function that is in the math support 
> header but I also don't want to try to make various llvm headers 
> ffreestanding-safe.

The function is only needed in the device rtl. Put it in the device rtl.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-19 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added inline comments.



Comment at: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h:102
+return R;
+  }
 };

jdoerfert wrote:
> JonChesterfield wrote:
> > jdoerfert wrote:
> > > It should be in the device rtl then, no?
> > This header is currently used from clang and the (amdgpu, could also be 
> > cuda if we like) host plugin. Possibly also from llvm. As of D108391 it 
> > would be used from the devicertl.
> > 
> > The idea is to have a single source of truth for the various magic numbers 
> > that the pieces should agree on and llvm is the common point on the 
> > dependency tree. I'm currently interested in that because I want to change 
> > some of them for gfx10 and have that magically ripple through the 
> > components. I'm not totally confident that will work out nicely for the 
> > host plugin as it has to dynamically handle different architectures but I 
> > think it'll be good enough.
> > 
> > It's not totally ideal to hand spin a function that is in the math support 
> > header but I also don't want to try to make various llvm headers 
> > ffreestanding-safe.
> >  It's not totally ideal to hand spin a function that is in the math support 
> > header but I also don't want to try to make various llvm headers 
> > ffreestanding-safe.
> 
> The function is only needed in the device rtl. Put it in the device rtl.
This particular function is only called by warpSizeLog2 which is currently only 
used by CGOpenMPRuntimeGPU. The deviceRTL doesn't call the function. However if 
this header includes the rest of llvm support then it can't call any of the 
others either.

I'm going to drop the log2 accessor (and this function) in favour of two calls 
into math support from CGOpenMPRuntime.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-19 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield updated this revision to Diff 367587.
JonChesterfield added a comment.

- delete log2 accessors per review comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/AMDGPU.h
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/Basic/Targets/NVPTX.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

Index: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
===
--- llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -62,19 +62,14 @@
   const unsigned GV_Slot_Size;
   /// The default value of maximum number of threads in a worker warp.
   const unsigned GV_Warp_Size;
-  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
-  /// for NVPTX.
-  const unsigned GV_Warp_Size_32;
-  /// The number of bits required to represent the max number of threads in warp
-  const unsigned GV_Warp_Size_Log2;
-  /// GV_Warp_Size * GV_Slot_Size,
-  const unsigned GV_Warp_Slot_Size;
+
+  constexpr unsigned warpSlotSize() const {
+return GV_Warp_Size * GV_Slot_Size;
+  }
+
   /// the maximum number of teams.
   const unsigned GV_Max_Teams;
-  /// Global Memory Alignment
-  const unsigned GV_Mem_Align;
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_Mask;
+
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
@@ -83,49 +78,40 @@
   const unsigned GV_Max_WG_Size;
   // The default maximum team size for a working group
   const unsigned GV_Default_WG_Size;
-  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  const unsigned GV_Max_Warp_Number;
-  /// The slot size that should be reserved for a working warp.
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_MaskL;
+
+  constexpr unsigned maxWarpNumber() const {
+return GV_Max_WG_Size / GV_Warp_Size;
+  }
 };
 
 /// For AMDGPU GPUs
 static constexpr GV AMDGPUGridValues = {
-448,   // GV_Threads
-256,   // GV_Slot_Size
-64,// GV_Warp_Size
-32,// GV_Warp_Size_32
-6, // GV_Warp_Size_Log2
-64 * 256,  // GV_Warp_Slot_Size
-128,   // GV_Max_Teams
-256,   // GV_Mem_Align
-63,// GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size,
-256,   // GV_Defaut_WG_Size
-1024 / 64, // GV_Max_WG_Size / GV_WarpSize
-63 // GV_Warp_Size_Log2_MaskL
+448,  // GV_Threads
+256,  // GV_Slot_Size
+64,   // GV_Warp_Size
+128,  // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size,
+256,  // GV_Default_WG_Size
 };
 
+static_assert(64 * 256 == AMDGPUGridValues.warpSlotSize(), "");
+static_assert(1024 / 64 == AMDGPUGridValues.maxWarpNumber(), "");
+
 /// For Nvidia GPUs
 static constexpr GV NVPTXGridValues = {
-992,   // GV_Threads
-256,   // GV_Slot_Size
-32,// GV_Warp_Size
-32,// GV_Warp_Size_32
-5, // GV_Warp_Size_Log2
-32 * 256,  // GV_Warp_Slot_Size
-1024,  // GV_Max_Teams
-256,   // GV_Mem_Align
-(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size
-128,   // GV_Defaut_WG_Size
-1024 / 32, // GV_Max_WG_Size / GV_WarpSize
-31 // GV_Warp_Size_Log2_MaskL
+992,  // GV_Threads
+256,  // GV_Slot_Size
+32,   // GV_Warp_Size
+1024, // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size
+128,  // GV_Default_WG_Size
 };
 
+static_assert(32 * 256 == NVPTXGridValues.warpSlotSize(), "");
+static_assert(1024 / 32 == NVPTXGridValues.maxWarpNumber(), "");
+
 } // namespace omp
 } // namespace llvm
 
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -106,8 +107,7 @@
 /// is the same for all known NVPTX architectures.
 enum MachineConfiguration : unsigned {
   /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
-  /// specific Grid Values like GV_Warp_Size, GV

[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-19 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield updated this revision to Diff 367589.
JonChesterfield added a comment.

- whitespace, drop asserts


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/AMDGPU.h
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/Basic/Targets/NVPTX.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

Index: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
===
--- llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -62,19 +62,13 @@
   const unsigned GV_Slot_Size;
   /// The default value of maximum number of threads in a worker warp.
   const unsigned GV_Warp_Size;
-  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
-  /// for NVPTX.
-  const unsigned GV_Warp_Size_32;
-  /// The number of bits required to represent the max number of threads in warp
-  const unsigned GV_Warp_Size_Log2;
-  /// GV_Warp_Size * GV_Slot_Size,
-  const unsigned GV_Warp_Slot_Size;
+
+  constexpr unsigned warpSlotSize() const {
+return GV_Warp_Size * GV_Slot_Size;
+  }
+
   /// the maximum number of teams.
   const unsigned GV_Max_Teams;
-  /// Global Memory Alignment
-  const unsigned GV_Mem_Align;
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_Mask;
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
@@ -83,47 +77,32 @@
   const unsigned GV_Max_WG_Size;
   // The default maximum team size for a working group
   const unsigned GV_Default_WG_Size;
-  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  const unsigned GV_Max_Warp_Number;
-  /// The slot size that should be reserved for a working warp.
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_MaskL;
+
+  constexpr unsigned maxWarpNumber() const {
+return GV_Max_WG_Size / GV_Warp_Size;
+  }
 };
 
 /// For AMDGPU GPUs
 static constexpr GV AMDGPUGridValues = {
-448,   // GV_Threads
-256,   // GV_Slot_Size
-64,// GV_Warp_Size
-32,// GV_Warp_Size_32
-6, // GV_Warp_Size_Log2
-64 * 256,  // GV_Warp_Slot_Size
-128,   // GV_Max_Teams
-256,   // GV_Mem_Align
-63,// GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size,
-256,   // GV_Defaut_WG_Size
-1024 / 64, // GV_Max_WG_Size / GV_WarpSize
-63 // GV_Warp_Size_Log2_MaskL
+448,  // GV_Threads
+256,  // GV_Slot_Size
+64,   // GV_Warp_Size
+128,  // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size,
+256,  // GV_Default_WG_Size
 };
 
 /// For Nvidia GPUs
 static constexpr GV NVPTXGridValues = {
-992,   // GV_Threads
-256,   // GV_Slot_Size
-32,// GV_Warp_Size
-32,// GV_Warp_Size_32
-5, // GV_Warp_Size_Log2
-32 * 256,  // GV_Warp_Slot_Size
-1024,  // GV_Max_Teams
-256,   // GV_Mem_Align
-(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size
-128,   // GV_Defaut_WG_Size
-1024 / 32, // GV_Max_WG_Size / GV_WarpSize
-31 // GV_Warp_Size_Log2_MaskL
+992,  // GV_Threads
+256,  // GV_Slot_Size
+32,   // GV_Warp_Size
+1024, // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size
+128,  // GV_Default_WG_Size
 };
 
 } // namespace omp
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -106,8 +107,7 @@
 /// is the same for all known NVPTX architectures.
 enum MachineConfiguration : unsigned {
   /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
-  /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
-  /// and GV_Warp_Size_Log2_Mask.
+  /// specific Grid Values like GV_Warp_Size, GV_Slot_Size
 
   /// Global memory alignment for performance.
   GlobalMemoryAlignment = 128,
@@ -535,7 +535,8 @@
 /// on the NVPTX device, to generate more efficient code.
 static llvm::Value *getNVPTXWarpID

[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-20 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert accepted this revision.
jdoerfert added a comment.
This revision is now accepted and ready to land.

LG


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-20 Thread Jon Chesterfield via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG2a47a84b4011: [openmp][nfc] Refactor GridValues (authored by 
JonChesterfield).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/AMDGPU.h
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/Basic/Targets/NVPTX.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

Index: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
===
--- llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -62,19 +62,13 @@
   const unsigned GV_Slot_Size;
   /// The default value of maximum number of threads in a worker warp.
   const unsigned GV_Warp_Size;
-  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
-  /// for NVPTX.
-  const unsigned GV_Warp_Size_32;
-  /// The number of bits required to represent the max number of threads in warp
-  const unsigned GV_Warp_Size_Log2;
-  /// GV_Warp_Size * GV_Slot_Size,
-  const unsigned GV_Warp_Slot_Size;
+
+  constexpr unsigned warpSlotSize() const {
+return GV_Warp_Size * GV_Slot_Size;
+  }
+
   /// the maximum number of teams.
   const unsigned GV_Max_Teams;
-  /// Global Memory Alignment
-  const unsigned GV_Mem_Align;
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_Mask;
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
@@ -83,47 +77,32 @@
   const unsigned GV_Max_WG_Size;
   // The default maximum team size for a working group
   const unsigned GV_Default_WG_Size;
-  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  const unsigned GV_Max_Warp_Number;
-  /// The slot size that should be reserved for a working warp.
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_MaskL;
+
+  constexpr unsigned maxWarpNumber() const {
+return GV_Max_WG_Size / GV_Warp_Size;
+  }
 };
 
 /// For AMDGPU GPUs
 static constexpr GV AMDGPUGridValues = {
-448,   // GV_Threads
-256,   // GV_Slot_Size
-64,// GV_Warp_Size
-32,// GV_Warp_Size_32
-6, // GV_Warp_Size_Log2
-64 * 256,  // GV_Warp_Slot_Size
-128,   // GV_Max_Teams
-256,   // GV_Mem_Align
-63,// GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size,
-256,   // GV_Defaut_WG_Size
-1024 / 64, // GV_Max_WG_Size / GV_WarpSize
-63 // GV_Warp_Size_Log2_MaskL
+448,  // GV_Threads
+256,  // GV_Slot_Size
+64,   // GV_Warp_Size
+128,  // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size,
+256,  // GV_Default_WG_Size
 };
 
 /// For Nvidia GPUs
 static constexpr GV NVPTXGridValues = {
-992,   // GV_Threads
-256,   // GV_Slot_Size
-32,// GV_Warp_Size
-32,// GV_Warp_Size_32
-5, // GV_Warp_Size_Log2
-32 * 256,  // GV_Warp_Slot_Size
-1024,  // GV_Max_Teams
-256,   // GV_Mem_Align
-(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size
-128,   // GV_Defaut_WG_Size
-1024 / 32, // GV_Max_WG_Size / GV_WarpSize
-31 // GV_Warp_Size_Log2_MaskL
+992,  // GV_Threads
+256,  // GV_Slot_Size
+32,   // GV_Warp_Size
+1024, // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size
+128,  // GV_Default_WG_Size
 };
 
 } // namespace omp
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -106,8 +107,7 @@
 /// is the same for all known NVPTX architectures.
 enum MachineConfiguration : unsigned {
   /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
-  /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
-  /// and GV_Warp_Size_Log2_Mask.
+  /// specific Grid Values like GV_Warp_Size, GV_Slot_Size
 
   /// Global memory alignment for performance.
   GlobalMemoryAlignment = 128,
@

[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-20 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added a comment.

Failed a nvptx codegen test (maybe the change to calculate log2 at runtime), 
currently away from my desk but will revert when I get back unless beaten to it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-23 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added inline comments.



Comment at: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp:551
+  llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
+  unsigned LaneIDMask = ~0 >> (32u - LaneIDBits);
   auto &RT = static_cast(CGF.CGM.getOpenMPRuntime());

Bug is here. `~0 >> 27u == -1` (bad) and `~0u >> 27u == 31` (good). Win for 
exact codegen tests.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-23 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield updated this revision to Diff 368100.
JonChesterfield added a comment.

- require unsigned shift


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/AMDGPU.h
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/Basic/Targets/NVPTX.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

Index: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
===
--- llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -62,19 +62,13 @@
   const unsigned GV_Slot_Size;
   /// The default value of maximum number of threads in a worker warp.
   const unsigned GV_Warp_Size;
-  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
-  /// for NVPTX.
-  const unsigned GV_Warp_Size_32;
-  /// The number of bits required to represent the max number of threads in warp
-  const unsigned GV_Warp_Size_Log2;
-  /// GV_Warp_Size * GV_Slot_Size,
-  const unsigned GV_Warp_Slot_Size;
+
+  constexpr unsigned warpSlotSize() const {
+return GV_Warp_Size * GV_Slot_Size;
+  }
+
   /// the maximum number of teams.
   const unsigned GV_Max_Teams;
-  /// Global Memory Alignment
-  const unsigned GV_Mem_Align;
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_Mask;
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
@@ -83,47 +77,32 @@
   const unsigned GV_Max_WG_Size;
   // The default maximum team size for a working group
   const unsigned GV_Default_WG_Size;
-  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  const unsigned GV_Max_Warp_Number;
-  /// The slot size that should be reserved for a working warp.
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_MaskL;
+
+  constexpr unsigned maxWarpNumber() const {
+return GV_Max_WG_Size / GV_Warp_Size;
+  }
 };
 
 /// For AMDGPU GPUs
 static constexpr GV AMDGPUGridValues = {
-448,   // GV_Threads
-256,   // GV_Slot_Size
-64,// GV_Warp_Size
-32,// GV_Warp_Size_32
-6, // GV_Warp_Size_Log2
-64 * 256,  // GV_Warp_Slot_Size
-128,   // GV_Max_Teams
-256,   // GV_Mem_Align
-63,// GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size,
-256,   // GV_Defaut_WG_Size
-1024 / 64, // GV_Max_WG_Size / GV_WarpSize
-63 // GV_Warp_Size_Log2_MaskL
+448,  // GV_Threads
+256,  // GV_Slot_Size
+64,   // GV_Warp_Size
+128,  // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size,
+256,  // GV_Default_WG_Size
 };
 
 /// For Nvidia GPUs
 static constexpr GV NVPTXGridValues = {
-992,   // GV_Threads
-256,   // GV_Slot_Size
-32,// GV_Warp_Size
-32,// GV_Warp_Size_32
-5, // GV_Warp_Size_Log2
-32 * 256,  // GV_Warp_Slot_Size
-1024,  // GV_Max_Teams
-256,   // GV_Mem_Align
-(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size
-128,   // GV_Defaut_WG_Size
-1024 / 32, // GV_Max_WG_Size / GV_WarpSize
-31 // GV_Warp_Size_Log2_MaskL
+992,  // GV_Threads
+256,  // GV_Slot_Size
+32,   // GV_Warp_Size
+1024, // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size
+128,  // GV_Default_WG_Size
 };
 
 } // namespace omp
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -106,8 +107,7 @@
 /// is the same for all known NVPTX architectures.
 enum MachineConfiguration : unsigned {
   /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
-  /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
-  /// and GV_Warp_Size_Log2_Mask.
+  /// specific Grid Values like GV_Warp_Size, GV_Slot_Size
 
   /// Global memory alignment for performance.
   GlobalMemoryAlignment = 128,
@@ -535,7 +535,8 @@
 /// on the NVPTX device, to generate more efficient code.
 static llvm::Value *getNVPTXWarpID(C

[PATCH] D108380: [openmp][nfc] Refactor GridValues

2021-08-23 Thread Jon Chesterfield via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc2574e63ff71: [openmp][nfc] Refactor GridValues (authored by 
JonChesterfield).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108380/new/

https://reviews.llvm.org/D108380

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/AMDGPU.cpp
  clang/lib/Basic/Targets/AMDGPU.h
  clang/lib/Basic/Targets/NVPTX.cpp
  clang/lib/Basic/Targets/NVPTX.h
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

Index: llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
===
--- llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -62,19 +62,13 @@
   const unsigned GV_Slot_Size;
   /// The default value of maximum number of threads in a worker warp.
   const unsigned GV_Warp_Size;
-  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
-  /// for NVPTX.
-  const unsigned GV_Warp_Size_32;
-  /// The number of bits required to represent the max number of threads in warp
-  const unsigned GV_Warp_Size_Log2;
-  /// GV_Warp_Size * GV_Slot_Size,
-  const unsigned GV_Warp_Slot_Size;
+
+  constexpr unsigned warpSlotSize() const {
+return GV_Warp_Size * GV_Slot_Size;
+  }
+
   /// the maximum number of teams.
   const unsigned GV_Max_Teams;
-  /// Global Memory Alignment
-  const unsigned GV_Mem_Align;
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_Mask;
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
@@ -83,47 +77,32 @@
   const unsigned GV_Max_WG_Size;
   // The default maximum team size for a working group
   const unsigned GV_Default_WG_Size;
-  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  const unsigned GV_Max_Warp_Number;
-  /// The slot size that should be reserved for a working warp.
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_MaskL;
+
+  constexpr unsigned maxWarpNumber() const {
+return GV_Max_WG_Size / GV_Warp_Size;
+  }
 };
 
 /// For AMDGPU GPUs
 static constexpr GV AMDGPUGridValues = {
-448,   // GV_Threads
-256,   // GV_Slot_Size
-64,// GV_Warp_Size
-32,// GV_Warp_Size_32
-6, // GV_Warp_Size_Log2
-64 * 256,  // GV_Warp_Slot_Size
-128,   // GV_Max_Teams
-256,   // GV_Mem_Align
-63,// GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size,
-256,   // GV_Defaut_WG_Size
-1024 / 64, // GV_Max_WG_Size / GV_WarpSize
-63 // GV_Warp_Size_Log2_MaskL
+448,  // GV_Threads
+256,  // GV_Slot_Size
+64,   // GV_Warp_Size
+128,  // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size,
+256,  // GV_Default_WG_Size
 };
 
 /// For Nvidia GPUs
 static constexpr GV NVPTXGridValues = {
-992,   // GV_Threads
-256,   // GV_Slot_Size
-32,// GV_Warp_Size
-32,// GV_Warp_Size_32
-5, // GV_Warp_Size_Log2
-32 * 256,  // GV_Warp_Slot_Size
-1024,  // GV_Max_Teams
-256,   // GV_Mem_Align
-(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
-896,   // GV_SimpleBufferSize
-1024,  // GV_Max_WG_Size
-128,   // GV_Defaut_WG_Size
-1024 / 32, // GV_Max_WG_Size / GV_WarpSize
-31 // GV_Warp_Size_Log2_MaskL
+992,  // GV_Threads
+256,  // GV_Slot_Size
+32,   // GV_Warp_Size
+1024, // GV_Max_Teams
+896,  // GV_SimpleBufferSize
+1024, // GV_Max_WG_Size
+128,  // GV_Default_WG_Size
 };
 
 } // namespace omp
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -106,8 +107,7 @@
 /// is the same for all known NVPTX architectures.
 enum MachineConfiguration : unsigned {
   /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
-  /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
-  /// and GV_Warp_Size_Log2_Mask.
+  /// specific Grid Values like GV_Warp_Size, GV_Slot_Size
 
   /// Global memory alignment for performance.
   GlobalMemoryAlignment = 128,
@@ -535,7 +535,8 @@
 /// on the NVPTX device, to generate