[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl added a comment. Hi Tony, I have already updated with a full diff. Please take a look. Thanks. https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
t-tye requested changes to this revision. t-tye added a comment. This revision now requires changes to proceed. Also please upload as full diff. Comment at: lib/Basic/Targets.cpp:2026-2069 + struct AddrSpace { +unsigned Generic, Global, Local, Constant, Private; +bool IsGenericZero; +AddrSpace(bool IsGenericZero_ = false){ + reset(IsGenericZero_); +} +void reset(bool IsGenericZero_) { Now that the target triple is being used to control the address space mapping, the mapping is only being set once. So suggest simplifying this and simply have two static arrays for the address spaces (like is done for the data layout), and then set AddrSpaceMap to the address of the appropriate array. Comment at: lib/Basic/Targets.cpp:2119 -AddrSpaceMap = +AddrSpaceMap = AS.getMap(); UseAddrSpaceMapMangling = true; See comment above. https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl updated this revision to Diff 92977. yaxunl added a comment. Revised by Tony's suggestions. https://reviews.llvm.org/D31210 Files: lib/Basic/Targets.cpp test/CodeGenOpenCL/amdgpu-env-amdgiz.cl Index: test/CodeGenOpenCL/amdgpu-env-amdgiz.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgpu-env-amdgiz.cl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgiz -emit-llvm -o - | FileCheck -check-prefix=NEW %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgizcl -emit-llvm -o - | FileCheck -check-prefix=NEW %s + +// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +// NEW: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +void foo(void) {} + Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -2002,14 +2002,23 @@ return llvm::makeArrayRef(GCCRegNames); } -static const unsigned AMDGPUAddrSpaceMap[] = { - 1,// opencl_global - 3,// opencl_local - 2,// opencl_constant - 4,// opencl_generic - 1,// cuda_device - 2,// cuda_constant - 3 // cuda_shared +static LangAS::Map AMDGPUPrivateIsZeroMap = { +1, // opencl_global +3, // opencl_local +2, // opencl_constant +4, // opencl_generic +1, // cuda_device +2, // cuda_constant +3 // cuda_shared +}; +static LangAS::Map AMDGPUGenericIsZeroMap = { +1, // opencl_global +3, // opencl_local +4, // opencl_constant +0, // opencl_generic +1, // cuda_device +4, // cuda_constant +3 // cuda_shared }; // If you edit the description strings, make sure you update @@ -2019,15 +2028,39 @@ "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; -static const char *const DataLayoutStringSI = +static const char *const DataLayoutStringSIPrivateIsZero = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; +static const char *const DataLayoutStringSIGenericIsZero = + "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" + "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; + class AMDGPUTargetInfo final : public TargetInfo { static const Builtin::Info BuiltinInfo[]; static const char * const GCCRegNames[]; + struct AddrSpace { +unsigned Generic, Global, Local, Constant, Private; +AddrSpace(bool IsGenericZero_ = false){ + if (IsGenericZero_) { +Generic = 0; +Global= 1; +Local = 3; +Constant = 4; +Private = 5; + } else { +Generic = 4; +Global= 1; +Local = 3; +Constant = 2; +Private = 0; + } +} + }; + /// \brief The GPU profiles supported by the AMDGPU target. enum GPUKind { GK_NONE, @@ -2054,39 +2087,43 @@ return TT.getArch() == llvm::Triple::amdgcn; } + static bool isGenericZero(const llvm::Triple ) { +return TT.getEnvironment() == llvm::Triple::AMDGIZ || +TT.getEnvironment() == llvm::Triple::AMDGIZCL; + } public: AMDGPUTargetInfo(const llvm::Triple , const TargetOptions ) : TargetInfo(Triple) , GPU(isAMDGCN(Triple) ? GK_GFX6 : GK_R600), hasFP64(false), hasFMAF(false), hasLDEXPF(false), - hasFullSpeedFP32Denorms(false){ + hasFullSpeedFP32Denorms(false), + AS(isGenericZero(Triple)){ if (getTriple().getArch() == llvm::Triple::amdgcn) { hasFP64 = true; hasFMAF = true; hasLDEXPF = true; } - +auto IsGenericZero = isGenericZero(Triple); resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ? -DataLayoutStringSI : DataLayoutStringR600); +(IsGenericZero ? DataLayoutStringSIGenericIsZero : +DataLayoutStringSIPrivateIsZero) +: DataLayoutStringR600); -AddrSpaceMap = +AddrSpaceMap = IsGenericZero ? : + UseAddrSpaceMapMangling = true; } uint64_t getPointerWidthV(unsigned AddrSpace) const override { if (GPU <= GK_CAYMAN) return 32; -switch(AddrSpace) { - default: -return 64; - case 0: - case 3: - case 5: -return 32; +if (AddrSpace == AS.Private || AddrSpace == AS.Local) { + return 32; } +return 64;
[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl added a comment. Ping! Any further issues? We need this in to move on with Clang codegen for the new address space mapping. Thanks. https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl updated this revision to Diff 92717. yaxunl edited the summary of this revision. yaxunl added a comment. Rename the triple and variable names. https://reviews.llvm.org/D31210 Files: lib/Basic/Targets.cpp test/CodeGenOpenCL/amdgpu-env-amdgiz.cl Index: test/CodeGenOpenCL/amdgpu-env-amdgiz.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgpu-env-amdgiz.cl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgiz -emit-llvm -o - | FileCheck -check-prefix=NEW %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgizcl -emit-llvm -o - | FileCheck -check-prefix=NEW %s + +// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +// NEW: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +void foo(void) {} + Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -2002,16 +2002,6 @@ return llvm::makeArrayRef(GCCRegNames); } -static const unsigned AMDGPUAddrSpaceMap[] = { - 1,// opencl_global - 3,// opencl_local - 2,// opencl_constant - 4,// opencl_generic - 1,// cuda_device - 2,// cuda_constant - 3 // cuda_shared -}; - // If you edit the description strings, make sure you update // getPointerWidthV(). @@ -2019,15 +2009,65 @@ "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; -static const char *const DataLayoutStringSI = +static const char *const DataLayoutStringSIPrivateIsZero = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; +static const char *const DataLayoutStringSIGenericIsZero = + "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" + "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; + class AMDGPUTargetInfo final : public TargetInfo { static const Builtin::Info BuiltinInfo[]; static const char * const GCCRegNames[]; + struct AddrSpace { +unsigned Generic, Global, Local, Constant, Private; +bool IsGenericZero; +AddrSpace(bool IsGenericZero_ = false){ + reset(IsGenericZero_); +} +void reset(bool IsGenericZero_) { + IsGenericZero = IsGenericZero_; + if (IsGenericZero) { +Generic = 0; +Global= 1; +Local = 3; +Constant = 4; +Private = 5; + } else { +Generic = 4; +Global= 1; +Local = 3; +Constant = 2; +Private = 0; + } +} +const LangAS::Map *getMap() { + static LangAS::Map PrivateIsZeroMap = { + 1, // opencl_global + 3, // opencl_local + 2, // opencl_constant + 4, // opencl_generic + 1, // cuda_device + 2, // cuda_constant + 3 // cuda_shared + }; + static LangAS::Map GenericIsZeroMap = { + 1, // opencl_global + 3, // opencl_local + 4, // opencl_constant + 0, // opencl_generic + 1, // cuda_device + 4, // cuda_constant + 3 // cuda_shared + }; + return IsGenericZero ? : +} + }; + /// \brief The GPU profiles supported by the AMDGPU target. enum GPUKind { GK_NONE, @@ -2061,17 +2101,22 @@ hasFP64(false), hasFMAF(false), hasLDEXPF(false), - hasFullSpeedFP32Denorms(false){ + hasFullSpeedFP32Denorms(false) { if (getTriple().getArch() == llvm::Triple::amdgcn) { hasFP64 = true; hasFMAF = true; hasLDEXPF = true; } +auto IsGenericZero = Triple.getEnvironment() == llvm::Triple::AMDGIZ || +Triple.getEnvironment() == llvm::Triple::AMDGIZCL; +AS.reset(IsGenericZero); resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ? -DataLayoutStringSI : DataLayoutStringR600); +(IsGenericZero ? DataLayoutStringSIGenericIsZero : +DataLayoutStringSIPrivateIsZero) +: DataLayoutStringR600); -AddrSpaceMap = +AddrSpaceMap = AS.getMap(); UseAddrSpaceMapMangling = true; } @@ -2079,14 +2124,10 @@ if (GPU <= GK_CAYMAN) return 32; -switch(AddrSpace) { - default: -return 64; - case 0: - case 3: - case 5: -return 32; +if (AddrSpace == AS.Private || AddrSpace == AS.Local) { +
[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl added a comment. In https://reviews.llvm.org/D31210#707842, @yaxunl wrote: > In https://reviews.llvm.org/D31210#707832, @rampitec wrote: > > > I also do not exactly like names "old" and "new". This implies we are going > > to switch to "new" permanently and doing transition. That is not clear yet, > > however. > > > How about we call the old address space mapping "Alloca Is Zero" (AIZ) > address space mapping, whereas the new address space mapping "Generic Is > Zero" (GIZ) address space mapping? > > I can change the variable names. Would you suggest to change the target > triple environment name too? e.g. amdnas => amdgiz, amdnascl => amdgizcl? I understand your indication. If we found the maintenance In https://reviews.llvm.org/D31210#707855, @rampitec wrote: > In https://reviews.llvm.org/D31210#707842, @yaxunl wrote: > > > In https://reviews.llvm.org/D31210#707832, @rampitec wrote: > > > > > I also do not exactly like names "old" and "new". This implies we are > > > going to switch to "new" permanently and doing transition. That is not > > > clear yet, however. > > > > > > How about we call the old address space mapping "Alloca Is Zero" (AIZ) > > address space mapping, whereas the new address space mapping "Generic Is > > Zero" (GIZ) address space mapping? > > > > I can change the variable names. Would you suggest to change the target > > triple environment name too? e.g. amdnas => amdgiz, amdnascl => amdgizcl? > > > I think "private is zero" is better than "alloca is zero". The latter may > change. I also think it make sense for these to match evn name. What "nas" > suffix stands for by the way? nas means "New Address Space". https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
rampitec added a comment. In https://reviews.llvm.org/D31210#707842, @yaxunl wrote: > In https://reviews.llvm.org/D31210#707832, @rampitec wrote: > > > I also do not exactly like names "old" and "new". This implies we are going > > to switch to "new" permanently and doing transition. That is not clear yet, > > however. > > > How about we call the old address space mapping "Alloca Is Zero" (AIZ) > address space mapping, whereas the new address space mapping "Generic Is > Zero" (GIZ) address space mapping? > > I can change the variable names. Would you suggest to change the target > triple environment name too? e.g. amdnas => amdgiz, amdnascl => amdgizcl? I think "private is zero" is better than "alloca is zero". The latter may change. I also think it make sense for these to match evn name. What "nas" suffix stands for by the way? https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl added a comment. In https://reviews.llvm.org/D31210#707832, @rampitec wrote: > I also do not exactly like names "old" and "new". This implies we are going > to switch to "new" permanently and doing transition. That is not clear yet, > however. How about we call the old address space mapping "Alloca Is Zero" (AIZ) address space mapping, whereas the new address space mapping "Generic Is Zero" (GIZ) address space mapping? I can change the variable names. Would you suggest to change the target triple environment name too? e.g. amdnas => amdgiz, amdnascl => amdgizcl? https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl added inline comments. Comment at: lib/Basic/Targets.cpp:2029-2040 + if (UseNew) { +Generic = 0; +Global= 1; +Local = 3; +Constant = 4; +Private = 5; + } else { tstellar wrote: > What are these values used for? The new address space mapping will be used for all kernel languages, especially for C++-based kernel languages like HCC/OpenMP/CUDA. We found the old address space mapping is unable to support C++ based kernel languages, that's the motivation of this change. Eventually we will switch to the new address space mapping. We temporarily keep two address space mapping during the transition, Comment at: lib/Basic/Targets.cpp:2057 + 4, // opencl_constant + 0, // opencl_generic + 1, // cuda_device tstellar wrote: > How will the backend deal with the fact that allocas return generic address > space pointers? Matt will upstream a patch which let alloca return a pointer to private address space. https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
rampitec added a comment. I also do not exactly like names "old" and "new". This implies we are going to switch to "new" permanently and doing transition. That is not clear yet, however. https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
tstellar added inline comments. Comment at: lib/Basic/Targets.cpp:2029-2040 + if (UseNew) { +Generic = 0; +Global= 1; +Local = 3; +Constant = 4; +Private = 5; + } else { What are these values used for? Comment at: lib/Basic/Targets.cpp:2057 + 4, // opencl_constant + 0, // opencl_generic + 1, // cuda_device How will the backend deal with the fact that allocas return generic address space pointers? https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl updated this revision to Diff 92674. yaxunl edited the summary of this revision. https://reviews.llvm.org/D31210 Files: lib/Basic/Targets.cpp test/CodeGenOpenCL/amdgpu-env-amdnas.cl Index: test/CodeGenOpenCL/amdgpu-env-amdnas.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgpu-env-amdnas.cl @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdnas -emit-llvm -o - | FileCheck -check-prefix=NEW %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdnascl -emit-llvm -o - | FileCheck -check-prefix=NEW %s + +// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +// NEW: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +void foo(void) {} + Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -2002,16 +2002,6 @@ return llvm::makeArrayRef(GCCRegNames); } -static const unsigned AMDGPUAddrSpaceMap[] = { - 1,// opencl_global - 3,// opencl_local - 2,// opencl_constant - 4,// opencl_generic - 1,// cuda_device - 2,// cuda_constant - 3 // cuda_shared -}; - // If you edit the description strings, make sure you update // getPointerWidthV(). @@ -2019,15 +2009,65 @@ "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; -static const char *const DataLayoutStringSI = +static const char *const DataLayoutStringSIOld = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; +static const char *const DataLayoutStringSINew = + "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" + "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; + class AMDGPUTargetInfo final : public TargetInfo { static const Builtin::Info BuiltinInfo[]; static const char * const GCCRegNames[]; + struct AddrSpace { +unsigned Generic, Global, Local, Constant, Private; +bool UseNew; +AddrSpace(bool UseNew_ = false){ + reset(UseNew_); +} +void reset(bool UseNew_) { + UseNew = UseNew_; + if (UseNew) { +Generic = 0; +Global= 1; +Local = 3; +Constant = 4; +Private = 5; + } else { +Generic = 4; +Global= 1; +Local = 3; +Constant = 2; +Private = 0; + } +} +const LangAS::Map *getMap() { + static LangAS::Map OldMap = { + 1, // opencl_global + 3, // opencl_local + 2, // opencl_constant + 4, // opencl_generic + 1, // cuda_device + 2, // cuda_constant + 3 // cuda_shared + }; + static LangAS::Map NewMap = { + 1, // opencl_global + 3, // opencl_local + 4, // opencl_constant + 0, // opencl_generic + 1, // cuda_device + 4, // cuda_constant + 3 // cuda_shared + }; + return UseNew ? : +} + }; + /// \brief The GPU profiles supported by the AMDGPU target. enum GPUKind { GK_NONE, @@ -2061,17 +2101,22 @@ hasFP64(false), hasFMAF(false), hasLDEXPF(false), - hasFullSpeedFP32Denorms(false){ + hasFullSpeedFP32Denorms(false) { if (getTriple().getArch() == llvm::Triple::amdgcn) { hasFP64 = true; hasFMAF = true; hasLDEXPF = true; } +auto UseNewAddrMap = Triple.getEnvironment() == llvm::Triple::AMDNAS || +Triple.getEnvironment() == llvm::Triple::AMDNASCL; +AS.reset(UseNewAddrMap); resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ? -DataLayoutStringSI : DataLayoutStringR600); +(UseNewAddrMap ? DataLayoutStringSINew : +DataLayoutStringSIOld) +: DataLayoutStringR600); -AddrSpaceMap = +AddrSpaceMap = AS.getMap(); UseAddrSpaceMapMangling = true; } @@ -2079,14 +2124,10 @@ if (GPU <= GK_CAYMAN) return 32; -switch(AddrSpace) { - default: -return 64; - case 0: - case 3: - case 5: -return 32; +if (AddrSpace == AS.Private || AddrSpace == AS.Local) { + return 32; } +return 64; } uint64_t getPreferredPointerWidth(unsigned AddrSpace) const override { @@ -2283,12 +2324,13 @@ /// DWARF. Optional
[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl updated this revision to Diff 92587. yaxunl added a comment. Fix getDWARFAddressSpace. https://reviews.llvm.org/D31210 Files: include/clang/Driver/Options.td lib/Basic/Targets.cpp test/CodeGenOpenCL/amdgpu-new-addr.cl Index: test/CodeGenOpenCL/amdgpu-new-addr.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgpu-new-addr.cl @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn -target-feature "+new-addr" -emit-llvm -o - | FileCheck -check-prefix=NEW %s + +// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +// NEW: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +void foo(void) {} + Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -1997,16 +1997,6 @@ return llvm::makeArrayRef(GCCRegNames); } -static const unsigned AMDGPUAddrSpaceMap[] = { - 1,// opencl_global - 3,// opencl_local - 2,// opencl_constant - 4,// opencl_generic - 1,// cuda_device - 2,// cuda_constant - 3 // cuda_shared -}; - // If you edit the description strings, make sure you update // getPointerWidthV(). @@ -2014,15 +2004,65 @@ "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; -static const char *const DataLayoutStringSI = +static const char *const DataLayoutStringSIOld = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; +static const char *const DataLayoutStringSINew = + "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" + "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; + class AMDGPUTargetInfo final : public TargetInfo { static const Builtin::Info BuiltinInfo[]; static const char * const GCCRegNames[]; + struct AddrSpace { +unsigned Generic, Global, Local, Constant, Private; +bool UseNew; +AddrSpace(bool UseNew_ = false){ + reset(UseNew_); +} +void reset(bool UseNew_) { + UseNew = UseNew_; + if (UseNew) { +Generic = 0; +Global= 1; +Local = 3; +Constant = 4; +Private = 5; + } else { +Generic = 4; +Global= 1; +Local = 3; +Constant = 2; +Private = 0; + } +} +const LangAS::Map *getMap() { + static LangAS::Map OldMap = { + 1, // opencl_global + 3, // opencl_local + 2, // opencl_constant + 4, // opencl_generic + 1, // cuda_device + 2, // cuda_constant + 3 // cuda_shared + }; + static LangAS::Map NewMap = { + 1, // opencl_global + 3, // opencl_local + 4, // opencl_constant + 0, // opencl_generic + 1, // cuda_device + 4, // cuda_constant + 3 // cuda_shared + }; + return UseNew ? : +} + }; + /// \brief The GPU profiles supported by the AMDGPU target. enum GPUKind { GK_NONE, @@ -2044,6 +2084,7 @@ bool hasFMAF:1; bool hasLDEXPF:1; bool hasFullSpeedFP32Denorms:1; + bool UseNewAddrMap:1; static bool isAMDGCN(const llvm::Triple ) { return TT.getArch() == llvm::Triple::amdgcn; @@ -2056,17 +2097,29 @@ hasFP64(false), hasFMAF(false), hasLDEXPF(false), - hasFullSpeedFP32Denorms(false){ + hasFullSpeedFP32Denorms(false), + UseNewAddrMap(false){ if (getTriple().getArch() == llvm::Triple::amdgcn) { hasFP64 = true; hasFMAF = true; hasLDEXPF = true; } +// At this stage, Opt.Features has not been populated yet. +for (auto :Opts.FeaturesAsWritten) { + if (I == "+new-addr") { +UseNewAddrMap = true; + } else if (I == "-new-addr") { +UseNewAddrMap = false; + } +} +AS.reset(UseNewAddrMap); resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ? -DataLayoutStringSI : DataLayoutStringR600); +(UseNewAddrMap ? DataLayoutStringSINew : +DataLayoutStringSIOld) +: DataLayoutStringR600); -AddrSpaceMap = +AddrSpaceMap = AS.getMap(); UseAddrSpaceMapMangling = true; } @@ -2074,14 +2127,10 @@ if (GPU <= GK_CAYMAN) return 32; -switch(AddrSpace) { - default: -return 64; - case 0: - case 3: - case 5: -return 32; +
[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl added a comment. In https://reviews.llvm.org/D31210#707014, @scchan wrote: > In https://reviews.llvm.org/D31210#706955, @yaxunl wrote: > > > In https://reviews.llvm.org/D31210#706890, @kzhuravl wrote: > > > > > In https://reviews.llvm.org/D31210#706880, @rampitec wrote: > > > > > > > I'm concerned about the default address space to be 64 bit. It would > > > > move alloca into generic address space effectively making private > > > > address to be 64 bit. > > > > This may have very undesirable performance implications, like address > > > > arithmetic can become expensive 64 bit and only be truncated at load or > > > > store. > > > > I realize you will use addrspacecast on an alloca's value, though I'm > > > > not sure that is sufficient to mitigate performance hit. > > > > I believe such change shall not be made without a good performance > > > > comparison with the feature enabled, provided the very likely > > > > performance issues. > > > > > > > > > Did not we want to use this: > > > http://lists.llvm.org/pipermail/llvm-dev/2017-March/99.html and use > > > non-0 for our allocas? > > > > > > Our final goal is to let alloca return private pointer. The Clang changes > > are mostly common whether alloca returns generic pointer or private > > pointer. Actually to be able to test the above patch we need to get the > > changes in Clang done first. > > > The goal for this is mainly to bring the mapping closer to nvptx? Our goal is to be able to support both OpenCL and C++-based kernel languages without degrading OpenCL's performance. To achieve that goal alloca returning private pointer may be necessary. https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
scchan added a comment. In https://reviews.llvm.org/D31210#706955, @yaxunl wrote: > In https://reviews.llvm.org/D31210#706890, @kzhuravl wrote: > > > In https://reviews.llvm.org/D31210#706880, @rampitec wrote: > > > > > I'm concerned about the default address space to be 64 bit. It would move > > > alloca into generic address space effectively making private address to > > > be 64 bit. > > > This may have very undesirable performance implications, like address > > > arithmetic can become expensive 64 bit and only be truncated at load or > > > store. > > > I realize you will use addrspacecast on an alloca's value, though I'm > > > not sure that is sufficient to mitigate performance hit. > > > I believe such change shall not be made without a good performance > > > comparison with the feature enabled, provided the very likely performance > > > issues. > > > > > > Did not we want to use this: > > http://lists.llvm.org/pipermail/llvm-dev/2017-March/99.html and use > > non-0 for our allocas? > > > Our final goal is to let alloca return private pointer. The Clang changes are > mostly common whether alloca returns generic pointer or private pointer. > Actually to be able to test the above patch we need to get the changes in > Clang done first. The goal for this is mainly to bring the mapping closer to nvptx? https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl added a comment. In https://reviews.llvm.org/D31210#706890, @kzhuravl wrote: > In https://reviews.llvm.org/D31210#706880, @rampitec wrote: > > > I'm concerned about the default address space to be 64 bit. It would move > > alloca into generic address space effectively making private address to be > > 64 bit. > > This may have very undesirable performance implications, like address > > arithmetic can become expensive 64 bit and only be truncated at load or > > store. > > I realize you will use addrspacecast on an alloca's value, though I'm not > > sure that is sufficient to mitigate performance hit. > > I believe such change shall not be made without a good performance > > comparison with the feature enabled, provided the very likely performance > > issues. > > > Did not we want to use this: > http://lists.llvm.org/pipermail/llvm-dev/2017-March/99.html and use non-0 > for our allocas? Our final goal is to let alloca return private pointer. The Clang changes are mostly common whether alloca returns generic pointer or private pointer. Actually to be able to test the above patch we need to get the changes in Clang done first. https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
kzhuravl added a comment. In https://reviews.llvm.org/D31210#706880, @rampitec wrote: > I'm concerned about the default address space to be 64 bit. It would move > alloca into generic address space effectively making private address to be 64 > bit. > This may have very undesirable performance implications, like address > arithmetic can become expensive 64 bit and only be truncated at load or store. > I realize you will use addrspacecast on an alloca's value, though I'm not > sure that is sufficient to mitigate performance hit. > I believe such change shall not be made without a good performance > comparison with the feature enabled, provided the very likely performance > issues. Did not we want to use this: http://lists.llvm.org/pipermail/llvm-dev/2017-March/99.html and use non-0 for our allocas? https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
rampitec added a comment. I'm concerned about the default address space to be 64 bit. It would move alloca into generic address space effectively making private address to be 64 bit. This may have very undesirable performance implications, like address arithmetic can become expensive 64 bit and only be truncated at load or store. I realize you will use addrspacecast on an alloca's value, though I'm not sure that is sufficient to mitigate performance hit. I believe such change shall not be made without a good performance comparison with the feature enabled, provided the very likely performance issues. https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
kzhuravl added a comment. getDWARFAddressSpace also needs to be updated. https://reviews.llvm.org/D31210 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D31210: [AMDGPU] Add new address space mapping
yaxunl created this revision. Herald added subscribers: Anastasia, tpr, dstuttard, nhaehnle, wdng, kzhuravl. A new target feature new-addr is added. When it is on, AMDGPU uses new address space mapping where generic address space is 0 and private address space is 5. The data layout is also changed correspondingly. By default this feature is off. This is the beginning of an upstreaming effort for changing address space mapping of AMDGPU target. Sema/CodeGen changes to make OpenCL/C++ working with the new address space mapping will follow. https://reviews.llvm.org/D31210 Files: include/clang/Driver/Options.td lib/Basic/Targets.cpp test/CodeGenOpenCL/amdgpu-new-addr.cl Index: test/CodeGenOpenCL/amdgpu-new-addr.cl === --- /dev/null +++ test/CodeGenOpenCL/amdgpu-new-addr.cl @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn -target-feature "+new-addr" -emit-llvm -o - | FileCheck -check-prefix=NEW %s + +// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +// NEW: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +void foo(void) {} + Index: lib/Basic/Targets.cpp === --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -1997,16 +1997,6 @@ return llvm::makeArrayRef(GCCRegNames); } -static const unsigned AMDGPUAddrSpaceMap[] = { - 1,// opencl_global - 3,// opencl_local - 2,// opencl_constant - 4,// opencl_generic - 1,// cuda_device - 2,// cuda_constant - 3 // cuda_shared -}; - // If you edit the description strings, make sure you update // getPointerWidthV(). @@ -2014,15 +2004,65 @@ "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; -static const char *const DataLayoutStringSI = +static const char *const DataLayoutStringSIOld = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; +static const char *const DataLayoutStringSINew = + "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" + "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" + "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; + class AMDGPUTargetInfo final : public TargetInfo { static const Builtin::Info BuiltinInfo[]; static const char * const GCCRegNames[]; + struct AddrSpace { +unsigned Generic, Global, Local, Constant, Private; +bool UseNew; +AddrSpace(bool UseNew_ = false){ + reset(UseNew_); +} +void reset(bool UseNew_) { + UseNew = UseNew_; + if (UseNew) { +Generic = 0; +Global= 1; +Local = 3; +Constant = 4; +Private = 5; + } else { +Generic = 4; +Global= 1; +Local = 3; +Constant = 2; +Private = 0; + } +} +const LangAS::Map *getMap() { + static LangAS::Map OldMap = { + 1, // opencl_global + 3, // opencl_local + 2, // opencl_constant + 4, // opencl_generic + 1, // cuda_device + 2, // cuda_constant + 3 // cuda_shared + }; + static LangAS::Map NewMap = { + 1, // opencl_global + 3, // opencl_local + 4, // opencl_constant + 0, // opencl_generic + 1, // cuda_device + 4, // cuda_constant + 3 // cuda_shared + }; + return UseNew ? : +} + }; + /// \brief The GPU profiles supported by the AMDGPU target. enum GPUKind { GK_NONE, @@ -2044,6 +2084,7 @@ bool hasFMAF:1; bool hasLDEXPF:1; bool hasFullSpeedFP32Denorms:1; + bool UseNewAddrMap:1; static bool isAMDGCN(const llvm::Triple ) { return TT.getArch() == llvm::Triple::amdgcn; @@ -2056,17 +2097,29 @@ hasFP64(false), hasFMAF(false), hasLDEXPF(false), - hasFullSpeedFP32Denorms(false){ + hasFullSpeedFP32Denorms(false), + UseNewAddrMap(false){ if (getTriple().getArch() == llvm::Triple::amdgcn) { hasFP64 = true; hasFMAF = true; hasLDEXPF = true; } +// At this stage, Opt.Features has not been populated yet. +for (auto :Opts.FeaturesAsWritten) { + if (I == "+new-addr") { +UseNewAddrMap = true; + } else if (I == "-new-addr") { +UseNewAddrMap = false; + } +} +AS.reset(UseNewAddrMap); resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ? -DataLayoutStringSI : DataLayoutStringR600); +