https://github.com/jhuber6 created 
https://github.com/llvm/llvm-project/pull/94751

Summary:
The utilities `nvptx-arch` and `amdgpu-arch` are used to support
`--offload-arch=native` among other utilities in clang. However, these
rely on the GPU drivers to query the features. In certain cases these
drivers can become locked up, which will lead to indefinate hangs on any
compiler jobs running in the meantime.

This patch adds a ten second timeout period for these utilities before
it kills the job and errors out.


>From a7bcd6b0568b00d0cac9bf0a6f9b17ca681425f2 Mon Sep 17 00:00:00 2001
From: Joseph Huber <hube...@outlook.com>
Date: Fri, 7 Jun 2024 08:15:06 -0500
Subject: [PATCH] [Clang] Add timeout for GPU detection utilities

Summary:
The utilities `nvptx-arch` and `amdgpu-arch` are used to support
`--offload-arch=native` among other utilities in clang. However, these
rely on the GPU drivers to query the features. In certain cases these
drivers can become locked up, which will lead to indefinate hangs on any
compiler jobs running in the meantime.

This patch adds a ten second timeout period for these utilities before
it kills the job and errors out.
---
 clang/include/clang/Driver/ToolChain.h | 2 +-
 clang/lib/Driver/ToolChain.cpp         | 8 ++++----
 clang/lib/Driver/ToolChains/AMDGPU.cpp | 2 +-
 clang/lib/Driver/ToolChains/Cuda.cpp   | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Driver/ToolChain.h 
b/clang/include/clang/Driver/ToolChain.h
index a4f9cad98aa8b..87a5034dfd78b 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -205,7 +205,7 @@ class ToolChain {
 
   /// Executes the given \p Executable and returns the stdout.
   llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-  executeToolChainProgram(StringRef Executable) const;
+  executeToolChainProgram(StringRef Executable, unsigned Timeout = 0) const;
 
   void setTripleEnvironment(llvm::Triple::EnvironmentType Env);
 
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 0e86bc07e0ea2..8c746ac8066cb 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -104,7 +104,8 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T,
 }
 
 llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-ToolChain::executeToolChainProgram(StringRef Executable) const {
+ToolChain::executeToolChainProgram(StringRef Executable,
+                                   unsigned Timeout) const {
   llvm::SmallString<64> OutputFile;
   llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile);
   llvm::FileRemover OutputRemover(OutputFile.c_str());
@@ -115,9 +116,8 @@ ToolChain::executeToolChainProgram(StringRef Executable) 
const {
   };
 
   std::string ErrorMessage;
-  if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects,
-                                /* SecondsToWait */ 0,
-                                /*MemoryLimit*/ 0, &ErrorMessage))
+  if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, Timeout,
+                                /*MemoryLimit=*/0, &ErrorMessage))
     return llvm::createStringError(std::error_code(),
                                    Executable + ": " + ErrorMessage);
 
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 9ffea57b005de..92895d8186e83 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -877,7 +877,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) 
const {
   else
     Program = GetProgramPath("amdgpu-arch");
 
-  auto StdoutOrErr = executeToolChainProgram(Program);
+  auto StdoutOrErr = executeToolChainProgram(Program, /*Timeout=*/10);
   if (!StdoutOrErr)
     return StdoutOrErr.takeError();
 
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp 
b/clang/lib/Driver/ToolChains/Cuda.cpp
index bbc8be91fd70b..47dac0e439f10 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -826,7 +826,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) 
const {
   else
     Program = GetProgramPath("nvptx-arch");
 
-  auto StdoutOrErr = executeToolChainProgram(Program);
+  auto StdoutOrErr = executeToolChainProgram(Program, /*Timeout=*/10);
   if (!StdoutOrErr)
     return StdoutOrErr.takeError();
 

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to