[PATCH] D16664: [CUDA] Generate CUDA's printf alloca in its function's entry block.

Justin Lebar via cfe-commits Wed, 27 Jan 2016 18:24:07 -0800

jlebar created this revision.
jlebar added a reviewer: rnk.
jlebar added subscribers: tra, echristo, jhen, cfe-commits.


This is necessary to prevent llvm from generating stacksave intrinsics
around this alloca.  NVVM doesn't have a stack, and we don't handle said
intrinsics.

I'm not sure if appending the alloca to the beginning of the entry block is
right.  Adding it to the end would make more sense to me, but then I'm not sure
how to ensure I'm not clobbering the terminator (except by always assuming the
BB is nonempty and inserting right before BB.back()?).

http://reviews.llvm.org/D16664

Files:
  lib/CodeGen/CGCUDABuiltin.cpp
  test/CodeGenCUDA/printf.cu

Index: test/CodeGenCUDA/printf.cu
===================================================================
--- test/CodeGenCUDA/printf.cu
+++ test/CodeGenCUDA/printf.cu
@@ -51,3 +51,14 @@
   // CHECK: call i32 @vprintf({{.*}}, i8* null){{$}}
   printf("hello, world!");
 }
+
+// Check that printf's alloca happens in the entry block, not inside the if
+// statement.
+__device__ bool foo();
+__device__ void CheckAllocaIsInEntryBlock() {
+  // CHECK: alloca i8, i32 4 align 4
+  // CHECK: call @_Z3foov()
+  if (foo()) {
+    printf("%d", 42);
+  }
+}
Index: lib/CodeGen/CGCUDABuiltin.cpp
===================================================================
--- lib/CodeGen/CGCUDABuiltin.cpp
+++ lib/CodeGen/CGCUDABuiltin.cpp
@@ -102,9 +102,15 @@
     // If there are no args, pass a null pointer to vprintf.
     BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
   } else {
-    BufferPtr = Builder.Insert(new llvm::AllocaInst(
+    // Insert our alloca not into the current BB, but into the function's entry
+    // block.  This is important because nvvm doesn't support alloca -- if we
+    // put the alloca anywhere else, llvm may eventually output
+    // stacksave/stackrestore intrinsics, which cause ptxas to choke.
+    auto *Alloca = new llvm::AllocaInst(
         llvm::Type::getInt8Ty(Ctx), llvm::ConstantInt::get(Int32Ty, BufSize),
-        BufAlign, "printf_arg_buf"));
+        BufAlign, "printf_arg_buf");
+    CurFn->getEntryBlock().getInstList().push_front(Alloca);
+    BufferPtr = Alloca;
 
     unsigned Offset = 0;
     for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {

Index: test/CodeGenCUDA/printf.cu
===================================================================
--- test/CodeGenCUDA/printf.cu
+++ test/CodeGenCUDA/printf.cu
@@ -51,3 +51,14 @@
   // CHECK: call i32 @vprintf({{.*}}, i8* null){{$}}
   printf("hello, world!");
 }
+
+// Check that printf's alloca happens in the entry block, not inside the if
+// statement.
+__device__ bool foo();
+__device__ void CheckAllocaIsInEntryBlock() {
+  // CHECK: alloca i8, i32 4 align 4
+  // CHECK: call @_Z3foov()
+  if (foo()) {
+    printf("%d", 42);
+  }
+}
Index: lib/CodeGen/CGCUDABuiltin.cpp
===================================================================
--- lib/CodeGen/CGCUDABuiltin.cpp
+++ lib/CodeGen/CGCUDABuiltin.cpp
@@ -102,9 +102,15 @@
     // If there are no args, pass a null pointer to vprintf.
     BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
   } else {
-    BufferPtr = Builder.Insert(new llvm::AllocaInst(
+    // Insert our alloca not into the current BB, but into the function's entry
+    // block.  This is important because nvvm doesn't support alloca -- if we
+    // put the alloca anywhere else, llvm may eventually output
+    // stacksave/stackrestore intrinsics, which cause ptxas to choke.
+    auto *Alloca = new llvm::AllocaInst(
         llvm::Type::getInt8Ty(Ctx), llvm::ConstantInt::get(Int32Ty, BufSize),
-        BufAlign, "printf_arg_buf"));
+        BufAlign, "printf_arg_buf");
+    CurFn->getEntryBlock().getInstList().push_front(Alloca);
+    BufferPtr = Alloca;
 
     unsigned Offset = 0;
     for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D16664: [CUDA] Generate CUDA's printf alloca in its function's entry block.

Reply via email to