jlebar updated this revision to Diff 46293.
jlebar added a comment.

Address echristo's review comments.


http://reviews.llvm.org/D16664

Files:
  lib/CodeGen/CGCUDABuiltin.cpp
  test/CodeGenCUDA/printf.cu

Index: test/CodeGenCUDA/printf.cu
===================================================================
--- test/CodeGenCUDA/printf.cu
+++ test/CodeGenCUDA/printf.cu
@@ -10,9 +10,9 @@
 
 // Check a simple call to printf end-to-end.
 __device__ int CheckSimple() {
+  // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca i8, i32 4, align 4
   // CHECK: [[FMT:%[0-9]+]] = load{{.*}}%fmt
   const char* fmt = "%d";
-  // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca i8, i32 4, align 4
   // CHECK: [[PTR:%[0-9]+]] = getelementptr i8, i8* [[BUF]], i32 0
   // CHECK: [[CAST:%[0-9]+]] = bitcast i8* [[PTR]] to i32*
   // CHECK: store i32 42, i32* [[CAST]], align 4
@@ -51,3 +51,14 @@
   // CHECK: call i32 @vprintf({{.*}}, i8* null){{$}}
   printf("hello, world!");
 }
+
+// Check that printf's alloca happens in the entry block, not inside the if
+// statement.
+__device__ bool foo();
+__device__ void CheckAllocaIsInEntryBlock() {
+  // CHECK: alloca i8, i32 4, align 4
+  // CHECK: call {{.*}} @_Z3foov()
+  if (foo()) {
+    printf("%d", 42);
+  }
+}
Index: lib/CodeGen/CGCUDABuiltin.cpp
===================================================================
--- lib/CodeGen/CGCUDABuiltin.cpp
+++ lib/CodeGen/CGCUDABuiltin.cpp
@@ -102,9 +102,15 @@
     // If there are no args, pass a null pointer to vprintf.
     BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
   } else {
-    BufferPtr = Builder.Insert(new llvm::AllocaInst(
+    // Insert our alloca not into the current BB, but into the function's entry
+    // block.  This is important because nvvm doesn't support alloca -- if we
+    // put the alloca anywhere else, llvm may eventually output
+    // stacksave/stackrestore intrinsics, which cause our nvvm backend to 
choke.
+    auto *Alloca = new llvm::AllocaInst(
         llvm::Type::getInt8Ty(Ctx), llvm::ConstantInt::get(Int32Ty, BufSize),
-        BufAlign, "printf_arg_buf"));
+        BufAlign, "printf_arg_buf");
+    Alloca->insertAfter(AllocaInsertPt);
+    BufferPtr = Alloca;
 
     unsigned Offset = 0;
     for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {


Index: test/CodeGenCUDA/printf.cu
===================================================================
--- test/CodeGenCUDA/printf.cu
+++ test/CodeGenCUDA/printf.cu
@@ -10,9 +10,9 @@
 
 // Check a simple call to printf end-to-end.
 __device__ int CheckSimple() {
+  // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca i8, i32 4, align 4
   // CHECK: [[FMT:%[0-9]+]] = load{{.*}}%fmt
   const char* fmt = "%d";
-  // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca i8, i32 4, align 4
   // CHECK: [[PTR:%[0-9]+]] = getelementptr i8, i8* [[BUF]], i32 0
   // CHECK: [[CAST:%[0-9]+]] = bitcast i8* [[PTR]] to i32*
   // CHECK: store i32 42, i32* [[CAST]], align 4
@@ -51,3 +51,14 @@
   // CHECK: call i32 @vprintf({{.*}}, i8* null){{$}}
   printf("hello, world!");
 }
+
+// Check that printf's alloca happens in the entry block, not inside the if
+// statement.
+__device__ bool foo();
+__device__ void CheckAllocaIsInEntryBlock() {
+  // CHECK: alloca i8, i32 4, align 4
+  // CHECK: call {{.*}} @_Z3foov()
+  if (foo()) {
+    printf("%d", 42);
+  }
+}
Index: lib/CodeGen/CGCUDABuiltin.cpp
===================================================================
--- lib/CodeGen/CGCUDABuiltin.cpp
+++ lib/CodeGen/CGCUDABuiltin.cpp
@@ -102,9 +102,15 @@
     // If there are no args, pass a null pointer to vprintf.
     BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
   } else {
-    BufferPtr = Builder.Insert(new llvm::AllocaInst(
+    // Insert our alloca not into the current BB, but into the function's entry
+    // block.  This is important because nvvm doesn't support alloca -- if we
+    // put the alloca anywhere else, llvm may eventually output
+    // stacksave/stackrestore intrinsics, which cause our nvvm backend to choke.
+    auto *Alloca = new llvm::AllocaInst(
         llvm::Type::getInt8Ty(Ctx), llvm::ConstantInt::get(Int32Ty, BufSize),
-        BufAlign, "printf_arg_buf"));
+        BufAlign, "printf_arg_buf");
+    Alloca->insertAfter(AllocaInsertPt);
+    BufferPtr = Alloca;
 
     unsigned Offset = 0;
     for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to