[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-24 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

Hi Tony,

I have already updated with a full diff. Please take a look. Thanks.


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-24 Thread Tony Tye via Phabricator via cfe-commits
t-tye requested changes to this revision.
t-tye added a comment.
This revision now requires changes to proceed.

Also please upload as full diff.




Comment at: lib/Basic/Targets.cpp:2026-2069
+  struct AddrSpace {
+unsigned Generic, Global, Local, Constant, Private;
+bool IsGenericZero;
+AddrSpace(bool IsGenericZero_ = false){
+  reset(IsGenericZero_);
+}
+void reset(bool IsGenericZero_) {

Now that the target triple is being used to control the address space mapping, 
the mapping is only being set once. So suggest simplifying this and simply have 
two static arrays for the address spaces (like is done for the data layout), 
and then set AddrSpaceMap to the address of the appropriate array.



Comment at: lib/Basic/Targets.cpp:2119
 
-AddrSpaceMap = 
+AddrSpaceMap = AS.getMap();
 UseAddrSpaceMapMangling = true;

See comment above.


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-24 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 92977.
yaxunl added a comment.

Revised by Tony's suggestions.


https://reviews.llvm.org/D31210

Files:
  lib/Basic/Targets.cpp
  test/CodeGenOpenCL/amdgpu-env-amdgiz.cl

Index: test/CodeGenOpenCL/amdgpu-env-amdgiz.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-env-amdgiz.cl
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgiz -emit-llvm -o - | FileCheck -check-prefix=NEW %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgizcl -emit-llvm -o - | FileCheck -check-prefix=NEW %s
+
+// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+// NEW: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+void foo(void) {}
+
Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2002,14 +2002,23 @@
   return llvm::makeArrayRef(GCCRegNames);
 }
 
-static const unsigned AMDGPUAddrSpaceMap[] = {
-  1,// opencl_global
-  3,// opencl_local
-  2,// opencl_constant
-  4,// opencl_generic
-  1,// cuda_device
-  2,// cuda_constant
-  3 // cuda_shared
+static LangAS::Map AMDGPUPrivateIsZeroMap = {
+1,  // opencl_global
+3,  // opencl_local
+2,  // opencl_constant
+4,  // opencl_generic
+1,  // cuda_device
+2,  // cuda_constant
+3   // cuda_shared
+};
+static LangAS::Map AMDGPUGenericIsZeroMap = {
+1,  // opencl_global
+3,  // opencl_local
+4,  // opencl_constant
+0,  // opencl_generic
+1,  // cuda_device
+4,  // cuda_constant
+3   // cuda_shared
 };
 
 // If you edit the description strings, make sure you update
@@ -2019,15 +2028,39 @@
   "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
-static const char *const DataLayoutStringSI =
+static const char *const DataLayoutStringSIPrivateIsZero =
   "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
   "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
+static const char *const DataLayoutStringSIGenericIsZero =
+  "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
+  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
+
 class AMDGPUTargetInfo final : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
   static const char * const GCCRegNames[];
 
+  struct AddrSpace {
+unsigned Generic, Global, Local, Constant, Private;
+AddrSpace(bool IsGenericZero_ = false){
+  if (IsGenericZero_) {
+Generic   = 0;
+Global= 1;
+Local = 3;
+Constant  = 4;
+Private   = 5;
+  } else {
+Generic   = 4;
+Global= 1;
+Local = 3;
+Constant  = 2;
+Private   = 0;
+  }
+}
+  };
+
   /// \brief The GPU profiles supported by the AMDGPU target.
   enum GPUKind {
 GK_NONE,
@@ -2054,39 +2087,43 @@
 return TT.getArch() == llvm::Triple::amdgcn;
   }
 
+  static bool isGenericZero(const llvm::Triple ) {
+return TT.getEnvironment() == llvm::Triple::AMDGIZ ||
+TT.getEnvironment() == llvm::Triple::AMDGIZCL;
+  }
 public:
   AMDGPUTargetInfo(const llvm::Triple , const TargetOptions )
 : TargetInfo(Triple) ,
   GPU(isAMDGCN(Triple) ? GK_GFX6 : GK_R600),
   hasFP64(false),
   hasFMAF(false),
   hasLDEXPF(false),
-  hasFullSpeedFP32Denorms(false){
+  hasFullSpeedFP32Denorms(false),
+  AS(isGenericZero(Triple)){
 if (getTriple().getArch() == llvm::Triple::amdgcn) {
   hasFP64 = true;
   hasFMAF = true;
   hasLDEXPF = true;
 }
-
+auto IsGenericZero = isGenericZero(Triple);
 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
-DataLayoutStringSI : DataLayoutStringR600);
+(IsGenericZero ? DataLayoutStringSIGenericIsZero :
+DataLayoutStringSIPrivateIsZero)
+: DataLayoutStringR600);
 
-AddrSpaceMap = 
+AddrSpaceMap = IsGenericZero ?  :
+
 UseAddrSpaceMapMangling = true;
   }
 
   uint64_t getPointerWidthV(unsigned AddrSpace) const override {
 if (GPU <= GK_CAYMAN)
   return 32;
 
-switch(AddrSpace) {
-  default:
-return 64;
-  case 0:
-  case 3:
-  case 5:
-return 32;
+if (AddrSpace == AS.Private || AddrSpace == AS.Local) {
+  return 32;
 }
+return 64;
 

[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-23 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

Ping! Any further issues? We need this in to move on with Clang codegen for the 
new address space mapping.

Thanks.


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-22 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 92717.
yaxunl edited the summary of this revision.
yaxunl added a comment.

Rename the triple and variable names.


https://reviews.llvm.org/D31210

Files:
  lib/Basic/Targets.cpp
  test/CodeGenOpenCL/amdgpu-env-amdgiz.cl

Index: test/CodeGenOpenCL/amdgpu-env-amdgiz.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-env-amdgiz.cl
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgiz -emit-llvm -o - | FileCheck -check-prefix=NEW %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgizcl -emit-llvm -o - | FileCheck -check-prefix=NEW %s
+
+// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+// NEW: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+void foo(void) {}
+
Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2002,16 +2002,6 @@
   return llvm::makeArrayRef(GCCRegNames);
 }
 
-static const unsigned AMDGPUAddrSpaceMap[] = {
-  1,// opencl_global
-  3,// opencl_local
-  2,// opencl_constant
-  4,// opencl_generic
-  1,// cuda_device
-  2,// cuda_constant
-  3 // cuda_shared
-};
-
 // If you edit the description strings, make sure you update
 // getPointerWidthV().
 
@@ -2019,15 +2009,65 @@
   "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
-static const char *const DataLayoutStringSI =
+static const char *const DataLayoutStringSIPrivateIsZero =
   "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
   "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
+static const char *const DataLayoutStringSIGenericIsZero =
+  "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
+  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
+
 class AMDGPUTargetInfo final : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
   static const char * const GCCRegNames[];
 
+  struct AddrSpace {
+unsigned Generic, Global, Local, Constant, Private;
+bool IsGenericZero;
+AddrSpace(bool IsGenericZero_ = false){
+  reset(IsGenericZero_);
+}
+void reset(bool IsGenericZero_) {
+  IsGenericZero = IsGenericZero_;
+  if (IsGenericZero) {
+Generic   = 0;
+Global= 1;
+Local = 3;
+Constant  = 4;
+Private   = 5;
+  } else {
+Generic   = 4;
+Global= 1;
+Local = 3;
+Constant  = 2;
+Private   = 0;
+  }
+}
+const LangAS::Map *getMap() {
+  static LangAS::Map PrivateIsZeroMap = {
+  1,  // opencl_global
+  3,  // opencl_local
+  2,  // opencl_constant
+  4,  // opencl_generic
+  1,  // cuda_device
+  2,  // cuda_constant
+  3   // cuda_shared
+  };
+  static LangAS::Map GenericIsZeroMap = {
+  1,  // opencl_global
+  3,  // opencl_local
+  4,  // opencl_constant
+  0,  // opencl_generic
+  1,  // cuda_device
+  4,  // cuda_constant
+  3   // cuda_shared
+  };
+  return IsGenericZero ?  : 
+}
+  };
+
   /// \brief The GPU profiles supported by the AMDGPU target.
   enum GPUKind {
 GK_NONE,
@@ -2061,17 +2101,22 @@
   hasFP64(false),
   hasFMAF(false),
   hasLDEXPF(false),
-  hasFullSpeedFP32Denorms(false){
+  hasFullSpeedFP32Denorms(false) {
 if (getTriple().getArch() == llvm::Triple::amdgcn) {
   hasFP64 = true;
   hasFMAF = true;
   hasLDEXPF = true;
 }
+auto IsGenericZero = Triple.getEnvironment() == llvm::Triple::AMDGIZ ||
+Triple.getEnvironment() == llvm::Triple::AMDGIZCL;
+AS.reset(IsGenericZero);
 
 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
-DataLayoutStringSI : DataLayoutStringR600);
+(IsGenericZero ? DataLayoutStringSIGenericIsZero :
+DataLayoutStringSIPrivateIsZero)
+: DataLayoutStringR600);
 
-AddrSpaceMap = 
+AddrSpaceMap = AS.getMap();
 UseAddrSpaceMapMangling = true;
   }
 
@@ -2079,14 +2124,10 @@
 if (GPU <= GK_CAYMAN)
   return 32;
 
-switch(AddrSpace) {
-  default:
-return 64;
-  case 0:
-  case 3:
-  case 5:
-return 32;
+if (AddrSpace == AS.Private || AddrSpace == AS.Local) {
+  

[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-22 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In https://reviews.llvm.org/D31210#707842, @yaxunl wrote:

> In https://reviews.llvm.org/D31210#707832, @rampitec wrote:
>
> > I also do not exactly like names "old" and "new". This implies we are going 
> > to switch to "new" permanently and doing transition. That is not clear yet, 
> > however.
>
>
> How about we call the old address space mapping "Alloca Is Zero" (AIZ) 
> address space mapping, whereas the new address space mapping "Generic Is 
> Zero" (GIZ) address space mapping?
>
> I can change the variable names. Would you suggest to change the target 
> triple environment name too? e.g. amdnas => amdgiz, amdnascl => amdgizcl?


I understand your indication. If we found the maintenance

In https://reviews.llvm.org/D31210#707855, @rampitec wrote:

> In https://reviews.llvm.org/D31210#707842, @yaxunl wrote:
>
> > In https://reviews.llvm.org/D31210#707832, @rampitec wrote:
> >
> > > I also do not exactly like names "old" and "new". This implies we are 
> > > going to switch to "new" permanently and doing transition. That is not 
> > > clear yet, however.
> >
> >
> > How about we call the old address space mapping "Alloca Is Zero" (AIZ) 
> > address space mapping, whereas the new address space mapping "Generic Is 
> > Zero" (GIZ) address space mapping?
> >
> > I can change the variable names. Would you suggest to change the target 
> > triple environment name too? e.g. amdnas => amdgiz, amdnascl => amdgizcl?
>
>
> I think "private is zero" is better than "alloca is zero". The latter may 
> change. I also think it make sense for these to match evn name. What "nas" 
> suffix stands for by the way?


nas means "New Address Space".


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-22 Thread Stanislav Mekhanoshin via Phabricator via cfe-commits
rampitec added a comment.

In https://reviews.llvm.org/D31210#707842, @yaxunl wrote:

> In https://reviews.llvm.org/D31210#707832, @rampitec wrote:
>
> > I also do not exactly like names "old" and "new". This implies we are going 
> > to switch to "new" permanently and doing transition. That is not clear yet, 
> > however.
>
>
> How about we call the old address space mapping "Alloca Is Zero" (AIZ) 
> address space mapping, whereas the new address space mapping "Generic Is 
> Zero" (GIZ) address space mapping?
>
> I can change the variable names. Would you suggest to change the target 
> triple environment name too? e.g. amdnas => amdgiz, amdnascl => amdgizcl?


I think "private is zero" is better than "alloca is zero". The latter may 
change. I also think it make sense for these to match evn name. What "nas" 
suffix stands for by the way?


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-22 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In https://reviews.llvm.org/D31210#707832, @rampitec wrote:

> I also do not exactly like names "old" and "new". This implies we are going 
> to switch to "new" permanently and doing transition. That is not clear yet, 
> however.


How about we call the old address space mapping "Alloca Is Zero" (AIZ) address 
space mapping, whereas the new address space mapping "Generic Is Zero" (GIZ) 
address space mapping?

I can change the variable names. Would you suggest to change the target triple 
environment name too? e.g. amdnas => amdgiz, amdnascl => amdgizcl?


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-22 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added inline comments.



Comment at: lib/Basic/Targets.cpp:2029-2040
+  if (UseNew) {
+Generic   = 0;
+Global= 1;
+Local = 3;
+Constant  = 4;
+Private   = 5;
+  } else {

tstellar wrote:
> What are these values used for?
The new address space mapping will be used for all kernel languages, especially 
for C++-based kernel languages like HCC/OpenMP/CUDA. 

We found the old address space mapping is unable to support C++ based kernel 
languages, that's the motivation of this change.

Eventually we will switch to the new address space mapping.

We temporarily keep two address space mapping during the transition,



Comment at: lib/Basic/Targets.cpp:2057
+  4,  // opencl_constant
+  0,  // opencl_generic
+  1,  // cuda_device

tstellar wrote:
> How will the backend deal with the fact that allocas return generic address 
> space pointers?
Matt will upstream a patch which let alloca return a pointer to private address 
space.


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-22 Thread Stanislav Mekhanoshin via Phabricator via cfe-commits
rampitec added a comment.

I also do not exactly like names "old" and "new". This implies we are going to 
switch to "new" permanently and doing transition. That is not clear yet, 
however.


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-22 Thread Tom Stellard via Phabricator via cfe-commits
tstellar added inline comments.



Comment at: lib/Basic/Targets.cpp:2029-2040
+  if (UseNew) {
+Generic   = 0;
+Global= 1;
+Local = 3;
+Constant  = 4;
+Private   = 5;
+  } else {

What are these values used for?



Comment at: lib/Basic/Targets.cpp:2057
+  4,  // opencl_constant
+  0,  // opencl_generic
+  1,  // cuda_device

How will the backend deal with the fact that allocas return generic address 
space pointers?


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-22 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 92674.
yaxunl edited the summary of this revision.

https://reviews.llvm.org/D31210

Files:
  lib/Basic/Targets.cpp
  test/CodeGenOpenCL/amdgpu-env-amdnas.cl

Index: test/CodeGenOpenCL/amdgpu-env-amdnas.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-env-amdnas.cl
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdnas -emit-llvm -o - | FileCheck -check-prefix=NEW %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdnascl -emit-llvm -o - | FileCheck -check-prefix=NEW %s
+
+// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+// NEW: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+void foo(void) {}
+
Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2002,16 +2002,6 @@
   return llvm::makeArrayRef(GCCRegNames);
 }
 
-static const unsigned AMDGPUAddrSpaceMap[] = {
-  1,// opencl_global
-  3,// opencl_local
-  2,// opencl_constant
-  4,// opencl_generic
-  1,// cuda_device
-  2,// cuda_constant
-  3 // cuda_shared
-};
-
 // If you edit the description strings, make sure you update
 // getPointerWidthV().
 
@@ -2019,15 +2009,65 @@
   "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
-static const char *const DataLayoutStringSI =
+static const char *const DataLayoutStringSIOld =
   "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
   "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
+static const char *const DataLayoutStringSINew =
+  "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
+  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
+
 class AMDGPUTargetInfo final : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
   static const char * const GCCRegNames[];
 
+  struct AddrSpace {
+unsigned Generic, Global, Local, Constant, Private;
+bool UseNew;
+AddrSpace(bool UseNew_ = false){
+  reset(UseNew_);
+}
+void reset(bool UseNew_) {
+  UseNew = UseNew_;
+  if (UseNew) {
+Generic   = 0;
+Global= 1;
+Local = 3;
+Constant  = 4;
+Private   = 5;
+  } else {
+Generic   = 4;
+Global= 1;
+Local = 3;
+Constant  = 2;
+Private   = 0;
+  }
+}
+const LangAS::Map *getMap() {
+  static LangAS::Map OldMap = {
+  1,  // opencl_global
+  3,  // opencl_local
+  2,  // opencl_constant
+  4,  // opencl_generic
+  1,  // cuda_device
+  2,  // cuda_constant
+  3   // cuda_shared
+  };
+  static LangAS::Map NewMap = {
+  1,  // opencl_global
+  3,  // opencl_local
+  4,  // opencl_constant
+  0,  // opencl_generic
+  1,  // cuda_device
+  4,  // cuda_constant
+  3   // cuda_shared
+  };
+  return UseNew ?  : 
+}
+  };
+
   /// \brief The GPU profiles supported by the AMDGPU target.
   enum GPUKind {
 GK_NONE,
@@ -2061,17 +2101,22 @@
   hasFP64(false),
   hasFMAF(false),
   hasLDEXPF(false),
-  hasFullSpeedFP32Denorms(false){
+  hasFullSpeedFP32Denorms(false) {
 if (getTriple().getArch() == llvm::Triple::amdgcn) {
   hasFP64 = true;
   hasFMAF = true;
   hasLDEXPF = true;
 }
+auto UseNewAddrMap = Triple.getEnvironment() == llvm::Triple::AMDNAS ||
+Triple.getEnvironment() == llvm::Triple::AMDNASCL;
+AS.reset(UseNewAddrMap);
 
 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
-DataLayoutStringSI : DataLayoutStringR600);
+(UseNewAddrMap ? DataLayoutStringSINew :
+DataLayoutStringSIOld)
+: DataLayoutStringR600);
 
-AddrSpaceMap = 
+AddrSpaceMap = AS.getMap();
 UseAddrSpaceMapMangling = true;
   }
 
@@ -2079,14 +2124,10 @@
 if (GPU <= GK_CAYMAN)
   return 32;
 
-switch(AddrSpace) {
-  default:
-return 64;
-  case 0:
-  case 3:
-  case 5:
-return 32;
+if (AddrSpace == AS.Private || AddrSpace == AS.Local) {
+  return 32;
 }
+return 64;
   }
 
   uint64_t getPreferredPointerWidth(unsigned AddrSpace) const override {
@@ -2283,12 +2324,13 @@
   /// DWARF.
   Optional 

[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-21 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 92587.
yaxunl added a comment.

Fix getDWARFAddressSpace.


https://reviews.llvm.org/D31210

Files:
  include/clang/Driver/Options.td
  lib/Basic/Targets.cpp
  test/CodeGenOpenCL/amdgpu-new-addr.cl

Index: test/CodeGenOpenCL/amdgpu-new-addr.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-new-addr.cl
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn -target-feature "+new-addr" -emit-llvm -o - | FileCheck -check-prefix=NEW %s
+
+// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+// NEW: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+void foo(void) {}
+
Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -1997,16 +1997,6 @@
   return llvm::makeArrayRef(GCCRegNames);
 }
 
-static const unsigned AMDGPUAddrSpaceMap[] = {
-  1,// opencl_global
-  3,// opencl_local
-  2,// opencl_constant
-  4,// opencl_generic
-  1,// cuda_device
-  2,// cuda_constant
-  3 // cuda_shared
-};
-
 // If you edit the description strings, make sure you update
 // getPointerWidthV().
 
@@ -2014,15 +2004,65 @@
   "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
-static const char *const DataLayoutStringSI =
+static const char *const DataLayoutStringSIOld =
   "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
   "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
+static const char *const DataLayoutStringSINew =
+  "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
+  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
+
 class AMDGPUTargetInfo final : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
   static const char * const GCCRegNames[];
 
+  struct AddrSpace {
+unsigned Generic, Global, Local, Constant, Private;
+bool UseNew;
+AddrSpace(bool UseNew_ = false){
+  reset(UseNew_);
+}
+void reset(bool UseNew_) {
+  UseNew = UseNew_;
+  if (UseNew) {
+Generic   = 0;
+Global= 1;
+Local = 3;
+Constant  = 4;
+Private   = 5;
+  } else {
+Generic   = 4;
+Global= 1;
+Local = 3;
+Constant  = 2;
+Private   = 0;
+  }
+}
+const LangAS::Map *getMap() {
+  static LangAS::Map OldMap = {
+  1,  // opencl_global
+  3,  // opencl_local
+  2,  // opencl_constant
+  4,  // opencl_generic
+  1,  // cuda_device
+  2,  // cuda_constant
+  3   // cuda_shared
+  };
+  static LangAS::Map NewMap = {
+  1,  // opencl_global
+  3,  // opencl_local
+  4,  // opencl_constant
+  0,  // opencl_generic
+  1,  // cuda_device
+  4,  // cuda_constant
+  3   // cuda_shared
+  };
+  return UseNew ?  : 
+}
+  };
+
   /// \brief The GPU profiles supported by the AMDGPU target.
   enum GPUKind {
 GK_NONE,
@@ -2044,6 +2084,7 @@
   bool hasFMAF:1;
   bool hasLDEXPF:1;
   bool hasFullSpeedFP32Denorms:1;
+  bool UseNewAddrMap:1;
 
   static bool isAMDGCN(const llvm::Triple ) {
 return TT.getArch() == llvm::Triple::amdgcn;
@@ -2056,17 +2097,29 @@
   hasFP64(false),
   hasFMAF(false),
   hasLDEXPF(false),
-  hasFullSpeedFP32Denorms(false){
+  hasFullSpeedFP32Denorms(false),
+  UseNewAddrMap(false){
 if (getTriple().getArch() == llvm::Triple::amdgcn) {
   hasFP64 = true;
   hasFMAF = true;
   hasLDEXPF = true;
 }
+// At this stage, Opt.Features has not been populated yet.
+for (auto :Opts.FeaturesAsWritten) {
+  if (I == "+new-addr") {
+UseNewAddrMap = true;
+  } else if (I == "-new-addr") {
+UseNewAddrMap = false;
+  }
+}
+AS.reset(UseNewAddrMap);
 
 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
-DataLayoutStringSI : DataLayoutStringR600);
+(UseNewAddrMap ? DataLayoutStringSINew :
+DataLayoutStringSIOld)
+: DataLayoutStringR600);
 
-AddrSpaceMap = 
+AddrSpaceMap = AS.getMap();
 UseAddrSpaceMapMangling = true;
   }
 
@@ -2074,14 +2127,10 @@
 if (GPU <= GK_CAYMAN)
   return 32;
 
-switch(AddrSpace) {
-  default:
-return 64;
-  case 0:
-  case 3:
-  case 5:
-return 32;
+

[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-21 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In https://reviews.llvm.org/D31210#707014, @scchan wrote:

> In https://reviews.llvm.org/D31210#706955, @yaxunl wrote:
>
> > In https://reviews.llvm.org/D31210#706890, @kzhuravl wrote:
> >
> > > In https://reviews.llvm.org/D31210#706880, @rampitec wrote:
> > >
> > > > I'm concerned about the default address space to be 64 bit. It would 
> > > > move alloca into generic address space effectively making private 
> > > > address to be 64 bit.
> > > >  This may have very undesirable performance implications, like address 
> > > > arithmetic can become expensive 64 bit and only be truncated at load or 
> > > > store.
> > > >  I realize you will use addrspacecast on an alloca's value, though I'm 
> > > > not sure that is sufficient to mitigate performance hit.
> > > >  I believe such change shall not be made without a good performance 
> > > > comparison with the feature enabled, provided the very likely 
> > > > performance issues.
> > >
> > >
> > > Did not we want to use this: 
> > > http://lists.llvm.org/pipermail/llvm-dev/2017-March/99.html and use 
> > > non-0 for our allocas?
> >
> >
> > Our final goal is to let alloca return private pointer. The Clang changes 
> > are mostly common whether alloca returns generic pointer or private 
> > pointer. Actually to be able to test the above patch we need to get the 
> > changes in Clang done first.
>
>
> The goal for this is mainly to bring the mapping closer to nvptx?


Our goal is to be able to support both OpenCL and C++-based kernel languages 
without degrading OpenCL's performance. To achieve that goal alloca returning 
private pointer may be necessary.


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-21 Thread Siu Chi Chan via Phabricator via cfe-commits
scchan added a comment.

In https://reviews.llvm.org/D31210#706955, @yaxunl wrote:

> In https://reviews.llvm.org/D31210#706890, @kzhuravl wrote:
>
> > In https://reviews.llvm.org/D31210#706880, @rampitec wrote:
> >
> > > I'm concerned about the default address space to be 64 bit. It would move 
> > > alloca into generic address space effectively making private address to 
> > > be 64 bit.
> > >  This may have very undesirable performance implications, like address 
> > > arithmetic can become expensive 64 bit and only be truncated at load or 
> > > store.
> > >  I realize you will use addrspacecast on an alloca's value, though I'm 
> > > not sure that is sufficient to mitigate performance hit.
> > >  I believe such change shall not be made without a good performance 
> > > comparison with the feature enabled, provided the very likely performance 
> > > issues.
> >
> >
> > Did not we want to use this: 
> > http://lists.llvm.org/pipermail/llvm-dev/2017-March/99.html and use 
> > non-0 for our allocas?
>
>
> Our final goal is to let alloca return private pointer. The Clang changes are 
> mostly common whether alloca returns generic pointer or private pointer. 
> Actually to be able to test the above patch we need to get the changes in 
> Clang done first.


The goal for this is mainly to bring the mapping closer to nvptx?


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-21 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In https://reviews.llvm.org/D31210#706890, @kzhuravl wrote:

> In https://reviews.llvm.org/D31210#706880, @rampitec wrote:
>
> > I'm concerned about the default address space to be 64 bit. It would move 
> > alloca into generic address space effectively making private address to be 
> > 64 bit.
> >  This may have very undesirable performance implications, like address 
> > arithmetic can become expensive 64 bit and only be truncated at load or 
> > store.
> >  I realize you will use addrspacecast on an alloca's value, though I'm not 
> > sure that is sufficient to mitigate performance hit.
> >  I believe such change shall not be made without a good performance 
> > comparison with the feature enabled, provided the very likely performance 
> > issues.
>
>
> Did not we want to use this: 
> http://lists.llvm.org/pipermail/llvm-dev/2017-March/99.html and use non-0 
> for our allocas?


Our final goal is to let alloca return private pointer. The Clang changes are 
mostly common whether alloca returns generic pointer or private pointer. 
Actually to be able to test the above patch we need to get the changes in Clang 
done first.


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-21 Thread Konstantin Zhuravlyov via Phabricator via cfe-commits
kzhuravl added a comment.

In https://reviews.llvm.org/D31210#706880, @rampitec wrote:

> I'm concerned about the default address space to be 64 bit. It would move 
> alloca into generic address space effectively making private address to be 64 
> bit.
>  This may have very undesirable performance implications, like address 
> arithmetic can become expensive 64 bit and only be truncated at load or store.
>  I realize you will use addrspacecast on an alloca's value, though I'm not 
> sure that is sufficient to mitigate performance hit.
>  I believe such change shall not be made without a good performance 
> comparison with the feature enabled, provided the very likely performance 
> issues.


Did not we want to use this: 
http://lists.llvm.org/pipermail/llvm-dev/2017-March/99.html and use non-0 
for our allocas?


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-21 Thread Stanislav Mekhanoshin via Phabricator via cfe-commits
rampitec added a comment.

I'm concerned about the default address space to be 64 bit. It would move 
alloca into generic address space effectively making private address to be 64 
bit.
This may have very undesirable performance implications, like address 
arithmetic can become expensive 64 bit and only be truncated at load or store.
I realize you will use addrspacecast on an alloca's value, though I'm not sure 
that is sufficient to mitigate performance hit.
I believe such change shall not be made without a good performance comparison 
with the feature enabled, provided the very likely performance issues.


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-21 Thread Konstantin Zhuravlyov via Phabricator via cfe-commits
kzhuravl added a comment.

getDWARFAddressSpace also needs to be updated.


https://reviews.llvm.org/D31210



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D31210: [AMDGPU] Add new address space mapping

2017-03-21 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl created this revision.
Herald added subscribers: Anastasia, tpr, dstuttard, nhaehnle, wdng, kzhuravl.

A new target feature new-addr is added. When it is on, AMDGPU uses new address 
space mapping where generic address space is 0 and private address space is 5. 
The data layout is also changed correspondingly. By default this feature is off.

This is the beginning of an upstreaming effort for changing address space 
mapping of AMDGPU target. Sema/CodeGen changes to make OpenCL/C++ working with 
the new address space mapping will follow.


https://reviews.llvm.org/D31210

Files:
  include/clang/Driver/Options.td
  lib/Basic/Targets.cpp
  test/CodeGenOpenCL/amdgpu-new-addr.cl

Index: test/CodeGenOpenCL/amdgpu-new-addr.cl
===
--- /dev/null
+++ test/CodeGenOpenCL/amdgpu-new-addr.cl
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn -target-feature "+new-addr" -emit-llvm -o - | FileCheck -check-prefix=NEW %s
+
+// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+// NEW: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+void foo(void) {}
+
Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -1997,16 +1997,6 @@
   return llvm::makeArrayRef(GCCRegNames);
 }
 
-static const unsigned AMDGPUAddrSpaceMap[] = {
-  1,// opencl_global
-  3,// opencl_local
-  2,// opencl_constant
-  4,// opencl_generic
-  1,// cuda_device
-  2,// cuda_constant
-  3 // cuda_shared
-};
-
 // If you edit the description strings, make sure you update
 // getPointerWidthV().
 
@@ -2014,15 +2004,65 @@
   "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
-static const char *const DataLayoutStringSI =
+static const char *const DataLayoutStringSIOld =
   "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
   "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
+static const char *const DataLayoutStringSINew =
+  "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
+  "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+  "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
+
 class AMDGPUTargetInfo final : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
   static const char * const GCCRegNames[];
 
+  struct AddrSpace {
+unsigned Generic, Global, Local, Constant, Private;
+bool UseNew;
+AddrSpace(bool UseNew_ = false){
+  reset(UseNew_);
+}
+void reset(bool UseNew_) {
+  UseNew = UseNew_;
+  if (UseNew) {
+Generic   = 0;
+Global= 1;
+Local = 3;
+Constant  = 4;
+Private   = 5;
+  } else {
+Generic   = 4;
+Global= 1;
+Local = 3;
+Constant  = 2;
+Private   = 0;
+  }
+}
+const LangAS::Map *getMap() {
+  static LangAS::Map OldMap = {
+  1,  // opencl_global
+  3,  // opencl_local
+  2,  // opencl_constant
+  4,  // opencl_generic
+  1,  // cuda_device
+  2,  // cuda_constant
+  3   // cuda_shared
+  };
+  static LangAS::Map NewMap = {
+  1,  // opencl_global
+  3,  // opencl_local
+  4,  // opencl_constant
+  0,  // opencl_generic
+  1,  // cuda_device
+  4,  // cuda_constant
+  3   // cuda_shared
+  };
+  return UseNew ?  : 
+}
+  };
+
   /// \brief The GPU profiles supported by the AMDGPU target.
   enum GPUKind {
 GK_NONE,
@@ -2044,6 +2084,7 @@
   bool hasFMAF:1;
   bool hasLDEXPF:1;
   bool hasFullSpeedFP32Denorms:1;
+  bool UseNewAddrMap:1;
 
   static bool isAMDGCN(const llvm::Triple ) {
 return TT.getArch() == llvm::Triple::amdgcn;
@@ -2056,17 +2097,29 @@
   hasFP64(false),
   hasFMAF(false),
   hasLDEXPF(false),
-  hasFullSpeedFP32Denorms(false){
+  hasFullSpeedFP32Denorms(false),
+  UseNewAddrMap(false){
 if (getTriple().getArch() == llvm::Triple::amdgcn) {
   hasFP64 = true;
   hasFMAF = true;
   hasLDEXPF = true;
 }
+// At this stage, Opt.Features has not been populated yet.
+for (auto :Opts.FeaturesAsWritten) {
+  if (I == "+new-addr") {
+UseNewAddrMap = true;
+  } else if (I == "-new-addr") {
+UseNewAddrMap = false;
+  }
+}
+AS.reset(UseNewAddrMap);
 
 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
-DataLayoutStringSI : DataLayoutStringR600);
+