https://github.com/yxsamliu created https://github.com/llvm/llvm-project/pull/179701
[Driver] Enable -ftime-trace for CUDA/HIP device compilation Previously, -ftime-trace only generated trace files for host compilation when compiling CUDA/HIP code. Device compilation was excluded because the OffloadingPrefix was non-empty, causing handleTimeTrace() to be skipped. This patch enables -ftime-trace for offload device compilation by: 1. Passing the offloading prefix to handleTimeTrace() 2. Including the bound architecture in the trace filename 3. Deriving the trace output directory from the -o option for device compilation (since the device output is a temp file) Trace files are now generated for each offload target: - Host: output.json - Device: output-hip-amdgcn-amd-amdhsa-gfx906.json Note: When using --save-temps, multiple compilation phases (preprocess, compile, codegen) write to the same trace file, with each phase overwriting the previous. This is pre-existing behavior that also affects regular C++ compilation and is not addressed by this patch. This addresses a long-standing limitation noted in D150282. >From 0548ff5a891c047765429614d6a8c3ee266d3fff Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" <[email protected]> Date: Wed, 4 Feb 2026 09:50:54 -0500 Subject: [PATCH] [Driver] Enable -ftime-trace for CUDA/HIP device compilation Previously, -ftime-trace only generated trace files for host compilation when compiling CUDA/HIP code. Device compilation was excluded because the OffloadingPrefix was non-empty, causing handleTimeTrace() to be skipped. This patch enables -ftime-trace for offload device compilation by: 1. Passing the offloading prefix to handleTimeTrace() 2. Including the bound architecture in the trace filename 3. Deriving the trace output directory from the -o option for device compilation (since the device output is a temp file) Trace files are now generated for each offload target: - Host: output.json - Device: output-hip-amdgcn-amd-amdhsa-gfx906.json Note: When using --save-temps, multiple compilation phases (preprocess, compile, codegen) write to the same trace file, with each phase overwriting the previous. This is pre-existing behavior that also affects regular C++ compilation and is not addressed by this patch. This addresses a long-standing limitation noted in D150282. --- clang/lib/Driver/Driver.cpp | 52 +++++++++++++++++++++++++------ clang/test/Driver/ftime-trace.cpp | 35 +++++++++++++++++++++ 2 files changed, 77 insertions(+), 10 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index eb3f9cbea2845..4df11efab5967 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5821,7 +5821,8 @@ InputInfoList Driver::BuildJobsForAction( static void handleTimeTrace(Compilation &C, const ArgList &Args, const JobAction *JA, const char *BaseInput, - const InputInfo &Result) { + const InputInfo &Result, + StringRef OffloadingPrefix = "") { Arg *A = Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ); if (!A) @@ -5830,18 +5831,43 @@ static void handleTimeTrace(Compilation &C, const ArgList &Args, if (A->getOption().matches(options::OPT_ftime_trace_EQ)) { Path = A->getValue(); if (llvm::sys::fs::is_directory(Path)) { - SmallString<128> Tmp(Result.getFilename()); - llvm::sys::path::replace_extension(Tmp, "json"); - llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp)); + // When -ftime-trace=<dir> and it's a directory: + // - For host/non-offload: use the output filename stem + // - For offload: use input filename stem + offloading prefix + SmallString<128> Tmp; + if (OffloadingPrefix.empty()) { + Tmp = llvm::sys::path::stem(Result.getFilename()); + } else { + Tmp = llvm::sys::path::stem(BaseInput); + Tmp += OffloadingPrefix; + } + Tmp += ".json"; + llvm::sys::path::append(Path, Tmp); } } else { if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) { - // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may not - // end with a path separator. + // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note + // that dumpdir may not end with a path separator. Path = DumpDir->getValue(); - Path += llvm::sys::path::filename(BaseInput); + Path += llvm::sys::path::stem(BaseInput); + Path += OffloadingPrefix; + } else if (!OffloadingPrefix.empty()) { + // For offloading, derive path from -o option or use current directory. + // The Result filename may be a temp file, so we use the -o output + // directory combined with the input filename and offload prefix. + if (Arg *FinalOutput = Args.getLastArg(options::OPT_o)) { + Path = llvm::sys::path::parent_path(FinalOutput->getValue()); + if (!Path.empty()) + Path += llvm::sys::path::get_separator(); + } + Path += llvm::sys::path::stem(BaseInput); + Path += OffloadingPrefix; } else { - Path = Result.getFilename(); + // Use the output filename stem for the trace file. + Path = llvm::sys::path::parent_path(Result.getFilename()); + if (!Path.empty()) + Path += llvm::sys::path::get_separator(); + Path += llvm::sys::path::stem(Result.getFilename()); } llvm::sys::path::replace_extension(Path, "json"); } @@ -6100,8 +6126,14 @@ InputInfoList Driver::BuildJobsForActionNoCache( AtTopLevel, MultipleArchs, OffloadingPrefix), BaseInput); - if (T->canEmitIR() && OffloadingPrefix.empty()) - handleTimeTrace(C, Args, JA, BaseInput, Result); + if (T->canEmitIR()) { + // For time trace, include the bound arch in the prefix to ensure unique + // trace files for each offload target. + std::string TimeTracePrefix = OffloadingPrefix; + if (!OffloadingPrefix.empty() && !BoundArch.empty()) + TimeTracePrefix += "-" + BoundArch.str(); + handleTimeTrace(C, Args, JA, BaseInput, Result, TimeTracePrefix); + } } if (CCCPrintBindings && !CCGenDiagnostics) { diff --git a/clang/test/Driver/ftime-trace.cpp b/clang/test/Driver/ftime-trace.cpp index 60c5885704b58..530d52482497a 100644 --- a/clang/test/Driver/ftime-trace.cpp +++ b/clang/test/Driver/ftime-trace.cpp @@ -63,6 +63,41 @@ // UNUSED-NEXT: warning: argument unused during compilation: '-ftime-trace-verbose' // UNUSED-NOT: warning: +/// Test HIP offloading: -ftime-trace should generate traces for both host and device. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 --offload-arch=gfx90a \ +// RUN: -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP +// HIP-DAG: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json" +// HIP-DAG: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx90a.json" +// HIP-DAG: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json" + +/// Test HIP offloading with new driver: same output as above. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 --offload-arch=gfx90a \ +// RUN: -c -o e/a.o --target=x86_64-linux-gnu --offload-new-driver 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP + +/// Test HIP offloading with -ftime-trace=<dir>: traces go to specified directory. +// RUN: %clang -### -ftime-trace=f -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 \ +// RUN: -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP-DIR +// HIP-DIR-DAG: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=f{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json" +// HIP-DIR-DAG: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=f{{/|\\\\}}a.json" + +/// Test HIP offloading with --save-temps: both host and device get unique trace files. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 \ +// RUN: -c -o e/a.o --target=x86_64-linux-gnu --save-temps 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP-SAVE-TEMPS +// HIP-SAVE-TEMPS-DAG: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json" +// HIP-SAVE-TEMPS-DAG: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a-host-x86_64-unknown-linux-gnu.json" + +/// Test CUDA offloading: -ftime-trace should generate traces for both host and device. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x cuda d/a.cpp --offload-arch=sm_70 --offload-arch=sm_80 \ +// RUN: -c -o e/a.o --target=x86_64-linux-gnu --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CUDA +// CUDA-DAG: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_70.json" +// CUDA-DAG: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_80.json" +// CUDA-DAG: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json" + template <typename T> struct Struct { T Num; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
