hliao created this revision.
hliao added reviewers: tra, yaxunl, bogner.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.
hliao requested review of this revision.

- Skip generating profile data on `__global__` function in the host 
compilation. It's a host-side stub function only and don't have profile 
instrumentation generated on the real function body. The extra profile data 
results in the malformed instrumentation profile data.
- Skip generating region mapping on functions in the wrong-side, i.e., + For 
the device compilation, skip host-only functions; and, + For the host 
compilation, skip device-only functions (including `__global__` functions.)
- As the device-side profiling is not ready yet, only host-side profile code 
generation is checked.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D85276

Files:
  clang/lib/CodeGen/CodeGenPGO.cpp
  clang/test/CodeGenCUDA/profile-coverage-mapping.cu


Index: clang/test/CodeGenCUDA/profile-coverage-mapping.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/profile-coverage-mapping.cu
@@ -0,0 +1,20 @@
+// RUN: echo "GPU binary would be here" > %t
+// RUN: %clang_cc1 -fprofile-instrument=clang -triple x86_64-linux-gnu 
-target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | 
FileCheck --check-prefix=PGOGEN %s
+// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -triple 
x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm 
-o - %s | FileCheck --check-prefix=COVMAP %s
+// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping 
-dump-coverage-mapping -triple x86_64-linux-gnu -target-sdk-version=8.0 
-fcuda-include-gpubinary %t -emit-llvm-only -o - %s | FileCheck 
--check-prefix=MAPPING %s
+
+#include "Inputs/cuda.h"
+
+// PGOGEN-NOT: @__profn_{{.*kernel.*}} =
+// COVMAP-COUNT-2: section "__llvm_covfun", comdat
+// COVMAP-NOT: section "__llvm_covfun", comdat
+// MAPPING-NOT: {{.*dfn.*}}:
+// MAPPING-NOT: {{.*kernel.*}}:
+
+__device__ void dfn(int i) {}
+
+__global__ void kernel(int i) { dfn(i); }
+
+void host(void) {
+  kernel<<<1, 1>>>(1);
+}
Index: clang/lib/CodeGen/CodeGenPGO.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenPGO.cpp
+++ clang/lib/CodeGen/CodeGenPGO.cpp
@@ -773,6 +773,11 @@
   if (!D->hasBody())
     return;
 
+  // Skip CUDA/HIP kernel launch stub functions.
+  if (CGM.getLangOpts().CUDA && !CGM.getLangOpts().CUDAIsDevice &&
+      D->hasAttr<CUDAGlobalAttr>())
+    return;
+
   bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
   llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
   if (!InstrumentRegions && !PGOReader)
@@ -831,6 +836,16 @@
   if (!D->getBody())
     return true;
 
+  // Skip host-only functions in the CUDA device compilation and device-only
+  // functions in the host compilation.
+  if (CGM.getLangOpts().CUDA &&
+      ((CGM.getLangOpts().CUDAIsDevice && !D->hasAttr<CUDADeviceAttr>() &&
+        !D->hasAttr<CUDAGlobalAttr>()) ||
+       (!CGM.getLangOpts().CUDAIsDevice &&
+        (D->hasAttr<CUDAGlobalAttr>() ||
+         (!D->hasAttr<CUDAHostAttr>() && D->hasAttr<CUDADeviceAttr>())))))
+    return true;
+
   // Don't map the functions in system headers.
   const auto &SM = CGM.getContext().getSourceManager();
   auto Loc = D->getBody()->getBeginLoc();


Index: clang/test/CodeGenCUDA/profile-coverage-mapping.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/profile-coverage-mapping.cu
@@ -0,0 +1,20 @@
+// RUN: echo "GPU binary would be here" > %t
+// RUN: %clang_cc1 -fprofile-instrument=clang -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=PGOGEN %s
+// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=COVMAP %s
+// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm-only -o - %s | FileCheck --check-prefix=MAPPING %s
+
+#include "Inputs/cuda.h"
+
+// PGOGEN-NOT: @__profn_{{.*kernel.*}} =
+// COVMAP-COUNT-2: section "__llvm_covfun", comdat
+// COVMAP-NOT: section "__llvm_covfun", comdat
+// MAPPING-NOT: {{.*dfn.*}}:
+// MAPPING-NOT: {{.*kernel.*}}:
+
+__device__ void dfn(int i) {}
+
+__global__ void kernel(int i) { dfn(i); }
+
+void host(void) {
+  kernel<<<1, 1>>>(1);
+}
Index: clang/lib/CodeGen/CodeGenPGO.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenPGO.cpp
+++ clang/lib/CodeGen/CodeGenPGO.cpp
@@ -773,6 +773,11 @@
   if (!D->hasBody())
     return;
 
+  // Skip CUDA/HIP kernel launch stub functions.
+  if (CGM.getLangOpts().CUDA && !CGM.getLangOpts().CUDAIsDevice &&
+      D->hasAttr<CUDAGlobalAttr>())
+    return;
+
   bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
   llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
   if (!InstrumentRegions && !PGOReader)
@@ -831,6 +836,16 @@
   if (!D->getBody())
     return true;
 
+  // Skip host-only functions in the CUDA device compilation and device-only
+  // functions in the host compilation.
+  if (CGM.getLangOpts().CUDA &&
+      ((CGM.getLangOpts().CUDAIsDevice && !D->hasAttr<CUDADeviceAttr>() &&
+        !D->hasAttr<CUDAGlobalAttr>()) ||
+       (!CGM.getLangOpts().CUDAIsDevice &&
+        (D->hasAttr<CUDAGlobalAttr>() ||
+         (!D->hasAttr<CUDAHostAttr>() && D->hasAttr<CUDADeviceAttr>())))))
+    return true;
+
   // Don't map the functions in system headers.
   const auto &SM = CGM.getContext().getSourceManager();
   auto Loc = D->getBody()->getBeginLoc();
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to