[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

Yaxun Liu via Phabricator via cfe-commits Wed, 28 Mar 2018 09:25:37 -0700

yaxunl created this revision.
yaxunl added a reviewer: rjmccall.
Herald added a subscriber: tpr.


HIP is a language similar to CUDA 
(https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md
 ).
The language syntax is very similar, which allows a hip program to be compiled 
as a CUDA program by Clang. The main difference
is the host API. HIP has a set of vendor neutral host API which can be 
implemented on different platforms. Currently there is open source
implementation of HIP runtime on amdgpu target 
(https://github.com/ROCm-Developer-Tools/HIP).

This patch adds support of file type hip and language option HIP.

When hip file is compiled, both LangOpts.CUDA and LangOpts.HIP is turned on. 
This allows compilation of hip program as CUDA
in most cases and only special handling of hip program is needed LangOpts.HIP 
is checked.

This patch also adds support of kernel launching of HIP program using HIP host 
API.

When -x hip is not specified, there is no behaviour change for CUDA.

Patch by Greg Rodgers.
Lit test added by Yaxun Liu.


https://reviews.llvm.org/D44984

Files:
  include/clang/Basic/LangOptions.def
  lib/CodeGen/CGCUDANV.cpp
  lib/Frontend/CompilerInvocation.cpp
  lib/Frontend/InitPreprocessor.cpp
  lib/Sema/SemaCUDA.cpp
  lib/Sema/SemaDecl.cpp
  test/CodeGenCUDA/Inputs/cuda.h
  test/CodeGenCUDA/device-stub.cu
  test/CodeGenCUDA/kernel-call.cu

Index: test/CodeGenCUDA/kernel-call.cu
===================================================================
--- test/CodeGenCUDA/kernel-call.cu
+++ test/CodeGenCUDA/kernel-call.cu
@@ -1,11 +1,20 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CUDA,CHECK
+// RUN: %clang_cc1 -x hip -emit-llvm %s -o - | FileCheck %s --check-prefixes=HIP,CHECk
+
 
 #include "Inputs/cuda.h"
 
+// CHECK-LABEL: define void @_Z2g1i(i32 %x)
+// HIP: call{{.*}}hipSetupArgument
+// HIP: call{{.*}}hipLaunchByPtr
+// CUDA: call{{.*}}cudaSetupArgument
+// CUDA: call{{.*}}cudaLaunch
 __global__ void g1(int x) {}
 
+// CHECK-LABEL: define i32 @main
 int main(void) {
-  // CHECK: call{{.*}}cudaConfigureCall
+  // HIP: call{{.*}}hipConfigureCall
+  // CUDA: call{{.*}}cudaConfigureCall
   // CHECK: icmp
   // CHECK: br
   // CHECK: call{{.*}}g1
Index: test/CodeGenCUDA/device-stub.cu
===================================================================
--- test/CodeGenCUDA/device-stub.cu
+++ test/CodeGenCUDA/device-stub.cu
@@ -1,8 +1,12 @@
 // RUN: echo "GPU binary would be here" > %t
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o - | FileCheck -check-prefixes=CHECK,CUDA %s
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o -  -DNOGLOBALS \
 // RUN:   | FileCheck %s -check-prefix=NOGLOBALS
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -x hip -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -x hip -emit-llvm %s -fcuda-include-gpubinary %t -o - | FileCheck -check-prefixes=CHECK,HIP %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -x hip -emit-llvm %s -fcuda-include-gpubinary %t -o -  -DNOGLOBALS \
+// RUN:   | FileCheck %s -check-prefix=NOGLOBALS
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -x hip -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN
 
 #include "Inputs/cuda.h"
 
@@ -48,67 +52,68 @@
 // CHECK: private unnamed_addr constant{{.*GPU binary would be here.*}}\00"
 // CHECK-SAME: section ".nv_fatbin", align 8
 // * constant struct that wraps GPU binary
-// CHECK: @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } 
+// CUDA: @__[[PREFIX:cuda]]_fatbin_wrapper = internal constant { i32, i32, i8*, i8* }
+// HIP: @__[[PREFIX:hip]]_fatbin_wrapper = internal constant { i32, i32, i8*, i8* }
 // CHECK-SAME: { i32 1180844977, i32 1, {{.*}}, i8* null }
 // CHECK-SAME: section ".nvFatBinSegment"
 // * variable to save GPU binary handle after initialization
-// CHECK: @__cuda_gpubin_handle = internal global i8** null
+// CHECK: @__[[PREFIX]]_gpubin_handle = internal global i8** null
 // * Make sure our constructor/destructor was added to global ctor/dtor list.
-// CHECK: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor
-// CHECK: @llvm.global_dtors = appending global {{.*}}@__cuda_module_dtor
+// CHECK: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
+// CHECK: @llvm.global_dtors = appending global {{.*}}@__[[PREFIX]]_module_dtor
 
 // Test that we build the correct number of calls to cudaSetupArgument followed
 // by a call to cudaLaunch.
 
 // CHECK: define{{.*}}kernelfunc
-// CHECK: call{{.*}}cudaSetupArgument
-// CHECK: call{{.*}}cudaSetupArgument
-// CHECK: call{{.*}}cudaSetupArgument
-// CHECK: call{{.*}}cudaLaunch
+// CHECK: call{{.*}}[[PREFIX]]SetupArgument
+// CHECK: call{{.*}}[[PREFIX]]SetupArgument
+// CHECK: call{{.*}}[[PREFIX]]SetupArgument
+// CHECK: call{{.*}}[[PREFIX]]Launch
 __global__ void kernelfunc(int i, int j, int k) {}
 
 // Test that we've built correct kernel launch sequence.
 // CHECK: define{{.*}}hostfunc
-// CHECK: call{{.*}}cudaConfigureCall
+// CHECK: call{{.*}}[[PREFIX]]ConfigureCall
 // CHECK: call{{.*}}kernelfunc
 void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 #endif
 
 // Test that we've built a function to register kernels and global vars.
-// CHECK: define internal void @__cuda_register_globals
-// CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc
-// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
-// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
-// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
-// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
+// CHECK: define internal void @__[[PREFIX]]_register_globals
+// CHECK: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc
+// CHECK-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
+// CHECK-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
 // CHECK: ret void
 
 // Test that we've built contructor..
-// CHECK: define internal void @__cuda_module_ctor
-//   .. that calls __cudaRegisterFatBinary(&__cuda_fatbin_wrapper)
-// CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
-//   .. stores return value in __cuda_gpubin_handle
-// CHECK-NEXT: store{{.*}}__cuda_gpubin_handle
-//   .. and then calls __cuda_register_globals
-// CHECK-NEXT: call void @__cuda_register_globals
+// CHECK: define internal void @__[[PREFIX]]_module_ctor
+//   .. that calls __[[PREFIX]]RegisterFatBinary(&__[[PREFIX]]_fatbin_wrapper)
+// CHECK: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
+//   .. stores return value in __[[PREFIX]]_gpubin_handle
+// CHECK-NEXT: store{{.*}}__[[PREFIX]]_gpubin_handle
+//   .. and then calls __[[PREFIX]]_register_globals
+// CHECK-NEXT: call void @__[[PREFIX]]_register_globals
 
 // Test that we've created destructor.
-// CHECK: define internal void @__cuda_module_dtor
-// CHECK: load{{.*}}__cuda_gpubin_handle
-// CHECK-NEXT: call void @__cudaUnregisterFatBinary
+// CHECK: define internal void @__[[PREFIX]]_module_dtor
+// CHECK: load{{.*}}__[[PREFIX]]_gpubin_handle
+// CHECK-NEXT: call void @__[[PREFIX]]UnregisterFatBinary
 
-// There should be no __cuda_register_globals if we have no
+// There should be no __[[PREFIX]]_register_globals if we have no
 // device-side globals, but we still need to register GPU binary.
 // Skip GPU binary string first.
 // NOGLOBALS: @0 = private unnamed_addr constant{{.*}}
-// NOGLOBALS-NOT: define internal void @__cuda_register_globals
-// NOGLOBALS: define internal void @__cuda_module_ctor
-// NOGLOBALS: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
-// NOGLOBALS-NOT: call void @__cuda_register_globals
-// NOGLOBALS: define internal void @__cuda_module_dtor
-// NOGLOBALS: call void @__cudaUnregisterFatBinary
+// NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
+// NOGLOBALS: define internal void @__[[PREFIX:.*]]_module_ctor
+// NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
+// NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals
+// NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor
+// NOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary
 
 // There should be no constructors/destructors if we have no GPU binary.
-// NOGPUBIN-NOT: define internal void @__cuda_register_globals
-// NOGPUBIN-NOT: define internal void @__cuda_module_ctor
-// NOGPUBIN-NOT: define internal void @__cuda_module_dtor
+// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals
+// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_ctor
+// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_dtor
Index: test/CodeGenCUDA/Inputs/cuda.h
===================================================================
--- test/CodeGenCUDA/Inputs/cuda.h
+++ test/CodeGenCUDA/Inputs/cuda.h
@@ -16,7 +16,12 @@
 
 typedef struct cudaStream *cudaStream_t;
 
+#ifdef __HIP__
+int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
+                     cudaStream_t stream = 0);
+#else
 int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
                       cudaStream_t stream = 0);
+#endif
 
 extern "C" __device__ int printf(const char*, ...);
Index: lib/Sema/SemaDecl.cpp
===================================================================
--- lib/Sema/SemaDecl.cpp
+++ lib/Sema/SemaDecl.cpp
@@ -9047,11 +9047,13 @@
 
   if (getLangOpts().CUDA) {
     IdentifierInfo *II = NewFD->getIdentifier();
-    if (II && II->isStr("cudaConfigureCall") && !NewFD->isInvalidDecl() &&
+    if (II &&
+        ((getLangOpts().HIP && II->isStr("hipConfigureCall")) ||
+         (!getLangOpts().HIP && II->isStr("cudaConfigureCall"))) &&
+        !NewFD->isInvalidDecl() &&
         NewFD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
       if (!R->getAs<FunctionType>()->getReturnType()->isScalarType())
         Diag(NewFD->getLocation(), diag::err_config_scalar_return);
-
       Context.setcudaConfigureCallDecl(NewFD);
     }
 
Index: lib/Sema/SemaCUDA.cpp
===================================================================
--- lib/Sema/SemaCUDA.cpp
+++ lib/Sema/SemaCUDA.cpp
@@ -42,8 +42,9 @@
                                          SourceLocation GGGLoc) {
   FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
   if (!ConfigDecl)
-    return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
-                     << "cudaConfigureCall");
+    return ExprError(
+        Diag(LLLLoc, diag::err_undeclared_var_use)
+        << (getLangOpts().HIP ? "hipConfigureCall" : "cudaConfigureCall"));
   QualType ConfigQTy = ConfigDecl->getType();
 
   DeclRefExpr *ConfigDR = new (Context)
Index: lib/Frontend/InitPreprocessor.cpp
===================================================================
--- lib/Frontend/InitPreprocessor.cpp
+++ lib/Frontend/InitPreprocessor.cpp
@@ -465,6 +465,8 @@
     Builder.defineMacro("__ASSEMBLER__");
   if (LangOpts.CUDA)
     Builder.defineMacro("__CUDA__");
+  if (LangOpts.HIP)
+    Builder.defineMacro("__HIP__");
 }
 
 /// Initialize the predefined C++ language feature test macros defined in
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -1591,6 +1591,7 @@
                 .Case("c", InputKind::C)
                 .Case("cl", InputKind::OpenCL)
                 .Case("cuda", InputKind::CUDA)
+                .Case("hip", InputKind::CUDA)
                 .Case("c++", InputKind::CXX)
                 .Case("objective-c", InputKind::ObjC)
                 .Case("objective-c++", InputKind::ObjCXX)
@@ -2102,6 +2103,11 @@
 
   Opts.IncludeDefaultHeader = Args.hasArg(OPT_finclude_default_header);
 
+  if (const Arg *A = Args.getLastArg(OPT_x)) {
+    if (!strcmp(A->getValue(), "hip"))
+      Opts.HIP = true;
+  }
+
   llvm::Triple T(TargetOpts.Triple);
   CompilerInvocation::setLangDefaults(Opts, IK, T, PPOpts, LangStd);
 
Index: lib/CodeGen/CGCUDANV.cpp
===================================================================
--- lib/CodeGen/CGCUDANV.cpp
+++ lib/CodeGen/CGCUDANV.cpp
@@ -48,6 +48,9 @@
 
   llvm::Constant *getSetupArgumentFn() const;
   llvm::Constant *getLaunchFn() const;
+  std::string addPrefixToName(CodeGenModule &CGM, std::string FuncName) const;
+  std::string addPrefixToNameBar(CodeGenModule &CGM,
+                                 std::string FuncName) const;
 
   /// Creates a function to register all kernel stubs generated in this module.
   llvm::Function *makeRegisterGlobalsFn();
@@ -91,6 +94,19 @@
 
 }
 
+std::string CGNVCUDARuntime::addPrefixToName(CodeGenModule &CGM,
+                                             std::string FuncName) const {
+  if (CGM.getLangOpts().HIP)
+    return ((Twine("hip") + Twine(FuncName)).str());
+  return ((Twine("cuda") + Twine(FuncName)).str());
+}
+std::string CGNVCUDARuntime::addPrefixToNameBar(CodeGenModule &CGM,
+                                                std::string FuncName) const {
+  if (CGM.getLangOpts().HIP)
+    return ((Twine("__hip") + Twine(FuncName)).str());
+  return ((Twine("__cuda") + Twine(FuncName)).str());
+}
+
 CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
     : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
       TheModule(CGM.getModule()) {
@@ -109,15 +125,19 @@
 llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
   // cudaError_t cudaSetupArgument(void *, size_t, size_t)
   llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
-  return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
-                                                           Params, false),
-                                   "cudaSetupArgument");
+  return CGM.CreateRuntimeFunction(
+      llvm::FunctionType::get(IntTy, Params, false),
+      addPrefixToName(CGM, "SetupArgument"));
 }
 
 llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
   // cudaError_t cudaLaunch(char *)
-  return CGM.CreateRuntimeFunction(
-      llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
+  if (CGM.getLangOpts().HIP)
+    return CGM.CreateRuntimeFunction(
+        llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
+  else
+    return CGM.CreateRuntimeFunction(
+        llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
 }
 
 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
@@ -182,7 +202,8 @@
 
   llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
       llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
-      llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule);
+      llvm::GlobalValue::InternalLinkage,
+      addPrefixToNameBar(CGM, "_register_globals"), &TheModule);
   llvm::BasicBlock *EntryBB =
       llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
   CGBuilderTy Builder(CGM, Context);
@@ -195,7 +216,7 @@
       VoidPtrTy,    VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
   llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
-      "__cudaRegisterFunction");
+      addPrefixToNameBar(CGM, "RegisterFunction"));
 
   // Extract GpuBinaryHandle passed as the first argument passed to
   // __cuda_register_globals() and generate __cudaRegisterFunction() call for
@@ -219,7 +240,7 @@
                                      IntTy,        IntTy};
   llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(IntTy, RegisterVarParams, false),
-      "__cudaRegisterVar");
+      addPrefixToNameBar(CGM, "RegisterVar"));
   for (auto &Pair : DeviceVars) {
     llvm::GlobalVariable *Var = Pair.first;
     unsigned Flags = Pair.second;
@@ -260,7 +281,7 @@
   // void ** __cudaRegisterFatBinary(void *);
   llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
-      "__cudaRegisterFatBinary");
+      addPrefixToNameBar(CGM, "RegisterFatBinary"));
   // struct { int magic, int version, void * gpu_binary, void * dont_care };
   llvm::StructType *FatbinWrapperTy =
       llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
@@ -279,7 +300,8 @@
 
   llvm::Function *ModuleCtorFunc = llvm::Function::Create(
       llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
-      llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule);
+      llvm::GlobalValue::InternalLinkage,
+      addPrefixToNameBar(CGM, "_module_ctor"), &TheModule);
   llvm::BasicBlock *CtorEntryBB =
       llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
   CGBuilderTy CtorBuilder(CGM, Context);
@@ -305,16 +327,18 @@
   // Unused in fatbin v1.
   Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
   llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
-      "__cuda_fatbin_wrapper", CGM.getPointerAlign(),
+      addPrefixToNameBar(CGM, "_fatbin_wrapper"), CGM.getPointerAlign(),
       /*constant*/ true);
   FatbinWrapper->setSection(FatbinSectionName);
 
   // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
   llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
       RegisterFatbinFunc, CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
   GpuBinaryHandle = new llvm::GlobalVariable(
       TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
-      llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
+      llvm::ConstantPointerNull::get(VoidPtrPtrTy),
+      addPrefixToNameBar(CGM, "_gpubin_handle"));
+
   CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
                                  CGM.getPointerAlign());
 
@@ -341,11 +365,13 @@
   // void __cudaUnregisterFatBinary(void ** handle);
   llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
-      "__cudaUnregisterFatBinary");
+      addPrefixToNameBar(CGM, "UnregisterFatBinary"));
 
   llvm::Function *ModuleDtorFunc = llvm::Function::Create(
       llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
-      llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor", &TheModule);
+      llvm::GlobalValue::InternalLinkage,
+      addPrefixToNameBar(CGM, "_module_dtor"), &TheModule);
+
   llvm::BasicBlock *DtorEntryBB =
       llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
   CGBuilderTy DtorBuilder(CGM, Context);
Index: include/clang/Basic/LangOptions.def
===================================================================
--- include/clang/Basic/LangOptions.def
+++ include/clang/Basic/LangOptions.def
@@ -193,6 +193,7 @@
 LANGOPT(NativeHalfArgsAndReturns, 1, 0, "Native half args and returns")
 LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
 LANGOPT(CUDA              , 1, 0, "CUDA")
+LANGOPT(HIP               , 1, 0, "HIP")
 LANGOPT(OpenMP            , 32, 0, "OpenMP support and version of OpenMP (31, 40 or 45)")
 LANGOPT(OpenMPSimd        , 1, 0, "Use SIMD only OpenMP support.")
 LANGOPT(OpenMPUseTLS      , 1, 0, "Use TLS for threadprivates or runtime calls")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D44984: [HIP] Add hip file type and codegen for kernel launching

Reply via email to