hliao created this revision. hliao added reviewers: tra, yaxunl, bogner. Herald added a project: clang. Herald added a subscriber: cfe-commits. hliao requested review of this revision.
- Skip generating profile data on `__global__` function in the host compilation. It's a host-side stub function only and don't have profile instrumentation generated on the real function body. The extra profile data results in the malformed instrumentation profile data. - Skip generating region mapping on functions in the wrong-side, i.e., + For the device compilation, skip host-only functions; and, + For the host compilation, skip device-only functions (including `__global__` functions.) - As the device-side profiling is not ready yet, only host-side profile code generation is checked. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D85276 Files: clang/lib/CodeGen/CodeGenPGO.cpp clang/test/CodeGenCUDA/profile-coverage-mapping.cu Index: clang/test/CodeGenCUDA/profile-coverage-mapping.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/profile-coverage-mapping.cu @@ -0,0 +1,20 @@ +// RUN: echo "GPU binary would be here" > %t +// RUN: %clang_cc1 -fprofile-instrument=clang -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=PGOGEN %s +// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=COVMAP %s +// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm-only -o - %s | FileCheck --check-prefix=MAPPING %s + +#include "Inputs/cuda.h" + +// PGOGEN-NOT: @__profn_{{.*kernel.*}} = +// COVMAP-COUNT-2: section "__llvm_covfun", comdat +// COVMAP-NOT: section "__llvm_covfun", comdat +// MAPPING-NOT: {{.*dfn.*}}: +// MAPPING-NOT: {{.*kernel.*}}: + +__device__ void dfn(int i) {} + +__global__ void kernel(int i) { dfn(i); } + +void host(void) { + kernel<<<1, 1>>>(1); +} Index: clang/lib/CodeGen/CodeGenPGO.cpp =================================================================== --- clang/lib/CodeGen/CodeGenPGO.cpp +++ clang/lib/CodeGen/CodeGenPGO.cpp @@ -773,6 +773,11 @@ if (!D->hasBody()) return; + // Skip CUDA/HIP kernel launch stub functions. + if (CGM.getLangOpts().CUDA && !CGM.getLangOpts().CUDAIsDevice && + D->hasAttr<CUDAGlobalAttr>()) + return; + bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr(); llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); if (!InstrumentRegions && !PGOReader) @@ -831,6 +836,16 @@ if (!D->getBody()) return true; + // Skip host-only functions in the CUDA device compilation and device-only + // functions in the host compilation. + if (CGM.getLangOpts().CUDA && + ((CGM.getLangOpts().CUDAIsDevice && !D->hasAttr<CUDADeviceAttr>() && + !D->hasAttr<CUDAGlobalAttr>()) || + (!CGM.getLangOpts().CUDAIsDevice && + (D->hasAttr<CUDAGlobalAttr>() || + (!D->hasAttr<CUDAHostAttr>() && D->hasAttr<CUDADeviceAttr>()))))) + return true; + // Don't map the functions in system headers. const auto &SM = CGM.getContext().getSourceManager(); auto Loc = D->getBody()->getBeginLoc();
Index: clang/test/CodeGenCUDA/profile-coverage-mapping.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/profile-coverage-mapping.cu @@ -0,0 +1,20 @@ +// RUN: echo "GPU binary would be here" > %t +// RUN: %clang_cc1 -fprofile-instrument=clang -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=PGOGEN %s +// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=COVMAP %s +// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm-only -o - %s | FileCheck --check-prefix=MAPPING %s + +#include "Inputs/cuda.h" + +// PGOGEN-NOT: @__profn_{{.*kernel.*}} = +// COVMAP-COUNT-2: section "__llvm_covfun", comdat +// COVMAP-NOT: section "__llvm_covfun", comdat +// MAPPING-NOT: {{.*dfn.*}}: +// MAPPING-NOT: {{.*kernel.*}}: + +__device__ void dfn(int i) {} + +__global__ void kernel(int i) { dfn(i); } + +void host(void) { + kernel<<<1, 1>>>(1); +} Index: clang/lib/CodeGen/CodeGenPGO.cpp =================================================================== --- clang/lib/CodeGen/CodeGenPGO.cpp +++ clang/lib/CodeGen/CodeGenPGO.cpp @@ -773,6 +773,11 @@ if (!D->hasBody()) return; + // Skip CUDA/HIP kernel launch stub functions. + if (CGM.getLangOpts().CUDA && !CGM.getLangOpts().CUDAIsDevice && + D->hasAttr<CUDAGlobalAttr>()) + return; + bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr(); llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); if (!InstrumentRegions && !PGOReader) @@ -831,6 +836,16 @@ if (!D->getBody()) return true; + // Skip host-only functions in the CUDA device compilation and device-only + // functions in the host compilation. + if (CGM.getLangOpts().CUDA && + ((CGM.getLangOpts().CUDAIsDevice && !D->hasAttr<CUDADeviceAttr>() && + !D->hasAttr<CUDAGlobalAttr>()) || + (!CGM.getLangOpts().CUDAIsDevice && + (D->hasAttr<CUDAGlobalAttr>() || + (!D->hasAttr<CUDAHostAttr>() && D->hasAttr<CUDADeviceAttr>()))))) + return true; + // Don't map the functions in system headers. const auto &SM = CGM.getContext().getSourceManager(); auto Loc = D->getBody()->getBeginLoc();
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits