llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-driver

Author: Yaxun (Sam) Liu (yxsamliu)

<details>
<summary>Changes</summary>

[Driver] Enable -ftime-trace for CUDA/HIP device compilation

Previously, -ftime-trace only generated trace files for host compilation
when compiling CUDA/HIP code. Device compilation was excluded because
the OffloadingPrefix was non-empty, causing handleTimeTrace() to be
skipped.

This patch enables -ftime-trace for offload device compilation by:
1. Passing the offloading prefix to handleTimeTrace()
2. Including the bound architecture in the trace filename
3. Deriving the trace output directory from the -o option for device
   compilation (since the device output is a temp file)

Trace files are now generated for each offload target:
- Host: output.json
- Device: output-hip-amdgcn-amd-amdhsa-gfx906.json

Note: When using --save-temps, multiple compilation phases (preprocess,
compile, codegen) write to the same trace file, with each phase
overwriting the previous. This is pre-existing behavior that also
affects regular C++ compilation and is not addressed by this patch.

This addresses a long-standing limitation noted in D150282.



---
Full diff: https://github.com/llvm/llvm-project/pull/179701.diff


2 Files Affected:

- (modified) clang/lib/Driver/Driver.cpp (+42-10) 
- (modified) clang/test/Driver/ftime-trace.cpp (+35) 


``````````diff
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index eb3f9cbea2845..4df11efab5967 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5821,7 +5821,8 @@ InputInfoList Driver::BuildJobsForAction(
 
 static void handleTimeTrace(Compilation &C, const ArgList &Args,
                             const JobAction *JA, const char *BaseInput,
-                            const InputInfo &Result) {
+                            const InputInfo &Result,
+                            StringRef OffloadingPrefix = "") {
   Arg *A =
       Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
@@ -5830,18 +5831,43 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
   if (A->getOption().matches(options::OPT_ftime_trace_EQ)) {
     Path = A->getValue();
     if (llvm::sys::fs::is_directory(Path)) {
-      SmallString<128> Tmp(Result.getFilename());
-      llvm::sys::path::replace_extension(Tmp, "json");
-      llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp));
+      // When -ftime-trace=<dir> and it's a directory:
+      // - For host/non-offload: use the output filename stem
+      // - For offload: use input filename stem + offloading prefix
+      SmallString<128> Tmp;
+      if (OffloadingPrefix.empty()) {
+        Tmp = llvm::sys::path::stem(Result.getFilename());
+      } else {
+        Tmp = llvm::sys::path::stem(BaseInput);
+        Tmp += OffloadingPrefix;
+      }
+      Tmp += ".json";
+      llvm::sys::path::append(Path, Tmp);
     }
   } else {
     if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) {
-      // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may 
not
-      // end with a path separator.
+      // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note
+      // that dumpdir may not end with a path separator.
       Path = DumpDir->getValue();
-      Path += llvm::sys::path::filename(BaseInput);
+      Path += llvm::sys::path::stem(BaseInput);
+      Path += OffloadingPrefix;
+    } else if (!OffloadingPrefix.empty()) {
+      // For offloading, derive path from -o option or use current directory.
+      // The Result filename may be a temp file, so we use the -o output
+      // directory combined with the input filename and offload prefix.
+      if (Arg *FinalOutput = Args.getLastArg(options::OPT_o)) {
+        Path = llvm::sys::path::parent_path(FinalOutput->getValue());
+        if (!Path.empty())
+          Path += llvm::sys::path::get_separator();
+      }
+      Path += llvm::sys::path::stem(BaseInput);
+      Path += OffloadingPrefix;
     } else {
-      Path = Result.getFilename();
+      // Use the output filename stem for the trace file.
+      Path = llvm::sys::path::parent_path(Result.getFilename());
+      if (!Path.empty())
+        Path += llvm::sys::path::get_separator();
+      Path += llvm::sys::path::stem(Result.getFilename());
     }
     llvm::sys::path::replace_extension(Path, "json");
   }
@@ -6100,8 +6126,14 @@ InputInfoList Driver::BuildJobsForActionNoCache(
                                              AtTopLevel, MultipleArchs,
                                              OffloadingPrefix),
                        BaseInput);
-    if (T->canEmitIR() && OffloadingPrefix.empty())
-      handleTimeTrace(C, Args, JA, BaseInput, Result);
+    if (T->canEmitIR()) {
+      // For time trace, include the bound arch in the prefix to ensure unique
+      // trace files for each offload target.
+      std::string TimeTracePrefix = OffloadingPrefix;
+      if (!OffloadingPrefix.empty() && !BoundArch.empty())
+        TimeTracePrefix += "-" + BoundArch.str();
+      handleTimeTrace(C, Args, JA, BaseInput, Result, TimeTracePrefix);
+    }
   }
 
   if (CCCPrintBindings && !CCGenDiagnostics) {
diff --git a/clang/test/Driver/ftime-trace.cpp 
b/clang/test/Driver/ftime-trace.cpp
index 60c5885704b58..530d52482497a 100644
--- a/clang/test/Driver/ftime-trace.cpp
+++ b/clang/test/Driver/ftime-trace.cpp
@@ -63,6 +63,41 @@
 // UNUSED-NEXT: warning: argument unused during compilation: 
'-ftime-trace-verbose'
 // UNUSED-NOT:  warning:
 
+/// Test HIP offloading: -ftime-trace should generate traces for both host and 
device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+// HIP-DAG: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP-DAG: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx90a.json"
+// HIP-DAG: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json"
+
+/// Test HIP offloading with new driver: same output as above.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu --offload-new-driver 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+
+/// Test HIP offloading with -ftime-trace=<dir>: traces go to specified 
directory.
+// RUN: %clang -### -ftime-trace=f -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP-DIR
+// HIP-DIR-DAG: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=f{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP-DIR-DAG: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} 
"-ftime-trace=f{{/|\\\\}}a.json"
+
+/// Test HIP offloading with --save-temps: both host and device get unique 
trace files.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu --save-temps 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP-SAVE-TEMPS
+// HIP-SAVE-TEMPS-DAG: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP-SAVE-TEMPS-DAG: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} 
"-ftime-trace=e/a-host-x86_64-unknown-linux-gnu.json"
+
+/// Test CUDA offloading: -ftime-trace should generate traces for both host 
and device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x cuda d/a.cpp 
--offload-arch=sm_70 --offload-arch=sm_80 \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu 
--cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CUDA
+// CUDA-DAG: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} 
"-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_70.json"
+// CUDA-DAG: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} 
"-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_80.json"
+// CUDA-DAG: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json"
+
 template <typename T>
 struct Struct {
   T Num;

``````````

</details>


https://github.com/llvm/llvm-project/pull/179701
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to