Author: Joseph Huber
Date: 2026-06-18T10:27:26-05:00
New Revision: 154115307ba171489125550ece373302f0ab01e1

URL: 
https://github.com/llvm/llvm-project/commit/154115307ba171489125550ece373302f0ab01e1
DIFF: 
https://github.com/llvm/llvm-project/commit/154115307ba171489125550ece373302f0ab01e1.diff

LOG: [HIP] Remove default `-flto-partitions=8` in the HIP toolchain (#203948)

Summary:
This was added and made it into a release, but it never should've been a
default argument. Partitioning the LTO is a fundamentally different
compilation model and has real impacts on the generated code. Right now
it is added silently, which breaks non-Hostcall printf and degreades
performance due to split uselists.

This is a contract that should not be made default. "Compile times" is
not a justification to silently change compilation semantics, that is
the user's build system's job. Parititioning to a magic number is not an
appropriate solution when passing -flto-partitions=8 or `-Xarch_device
-flto-partitions=8` is perfectly viable and not hidden from the user.

This resolves the 12% performance regression observed when switching to
the LTO toolchain in HIP for dcsrgemm.

Added: 
    

Modified: 
    clang/lib/Driver/ToolChains/HIPAMD.cpp
    clang/test/Driver/hip-toolchain-rdc-static-lib.hip
    clang/test/Driver/hip-toolchain-rdc.hip

Removed: 
    


################################################################################
diff  --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp 
b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index 48d210b82784c..e48c28408dc67 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -294,12 +294,6 @@ HIPAMDToolChain::TranslateArgs(const 
llvm::opt::DerivedArgList &Args,
   llvm::opt::DerivedArgList *DAL =
       ROCMToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
 
-  if (!Args.hasArg(options::OPT_flto_partitions_EQ)) {
-    const OptTable &Opts = getDriver().getOpts();
-    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_flto_partitions_EQ),
-                      "8");
-  }
-
   return DAL;
 }
 

diff  --git a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip 
b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip
index 32a45296fcd96..71ac901b69299 100644
--- a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip
+++ b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip
@@ -2,6 +2,7 @@
 // RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 // RUN:   --no-offload-new-driver --emit-static-lib -nogpulib \
 // RUN:   -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
+// RUN:   -flto-partitions=8 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s

diff  --git a/clang/test/Driver/hip-toolchain-rdc.hip 
b/clang/test/Driver/hip-toolchain-rdc.hip
index 414561a8eadb1..7e100c521441d 100644
--- a/clang/test/Driver/hip-toolchain-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-rdc.hip
@@ -5,6 +5,7 @@
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \
 // RUN:   -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
 // RUN:   --no-offload-new-driver -fhip-dump-offload-linker-script \
+// RUN:   -flto-partitions=8 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LNX %s
@@ -16,6 +17,7 @@
 // RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \
 // RUN:   -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \
 // RUN:   --no-offload-new-driver -fhip-dump-offload-linker-script \
+// RUN:   -flto-partitions=8 \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,MSVC %s
@@ -170,11 +172,13 @@
 
 // RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc 
--no-offload-new-driver \
 // RUN:   -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT %s
-// LTO_DEFAULT: lld{{.*}}"--lto-partitions=8"
+// LTO_DEFAULT: lld
+// LTO_DEFAULT-NOT: "--lto-partitions=
 
 // RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc 
--offload-new-driver \
 // RUN:   -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT_NEW 
%s
-// LTO_DEFAULT_NEW: 
clang-linker-wrapper{{.*}}"--device-compiler=amdgcn-amd-amdhsa=-flto-partitions=8"
+// LTO_DEFAULT_NEW: clang-linker-wrapper
+// LTO_DEFAULT_NEW-NOT: "--device-compiler=amdgcn-amd-amdhsa=-flto-partitions=
 
 // RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc 
--no-offload-new-driver \
 // RUN:   -L. -foffload-lto -flto-partitions=42 %s 2>&1 | FileCheck 
-check-prefix=LTO_PARTS %s


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to