Author: David Sherwood Date: 2021-10-19T14:57:51+01:00 New Revision: 607fb1bb8c91a2f284d8c63f3066ab8cc1a66955
URL: https://github.com/llvm/llvm-project/commit/607fb1bb8c91a2f284d8c63f3066ab8cc1a66955 DIFF: https://github.com/llvm/llvm-project/commit/607fb1bb8c91a2f284d8c63f3066ab8cc1a66955.diff LOG: [AArch64] Always add -tune-cpu argument to -cc1 driver This patch ensures that we always tune for a given CPU on AArch64 targets when the user specifies the "-mtune=xyz" flag. In the AArch64Subtarget if the tune flag is unset we use the CPU value instead. I've updated the release notes here: llvm/docs/ReleaseNotes.rst and added tests here: clang/test/Driver/aarch64-mtune.c Differential Revision: https://reviews.llvm.org/D110258 Added: clang/test/Driver/aarch64-mtune.c Modified: clang/docs/ReleaseNotes.rst clang/lib/Driver/ToolChains/Clang.cpp llvm/docs/ReleaseNotes.rst llvm/lib/Target/AArch64/AArch64Subtarget.cpp llvm/lib/Target/AArch64/AArch64Subtarget.h llvm/lib/Target/AArch64/AArch64TargetMachine.cpp llvm/unittests/Target/AArch64/InstSizes.cpp llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp Removed: ################################################################################ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 27ff9ddc70a34..05bd9cfea3fa5 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -192,6 +192,13 @@ Arm and AArch64 Support in Clang - Support has been added for the following processors (command-line identifiers in parentheses): - Arm Cortex-A510 (``cortex-a510``) +- The -mtune flag is no longer ignored for AArch64. It is now possible to +tune code generation for a particular CPU with -mtune without setting any +architectural features. For example, compiling with +"-mcpu=generic -mtune=cortex-a57" will not enable any Cortex-A57 specific +architecture features, but will enable certain optimizations specific to +Cortex-A57 CPUs and enable the use of a more accurate scheduling model. + Internal API Changes -------------------- diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 316c6026adf5c..68b6950364583 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1833,6 +1833,21 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, } AddAAPCSVolatileBitfieldArgs(Args, CmdArgs); + + if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { + StringRef Name = A->getValue(); + + std::string TuneCPU; + if (Name == "native") + TuneCPU = std::string(llvm::sys::getHostCPUName()); + else + TuneCPU = std::string(Name); + + if (!TuneCPU.empty()) { + CmdArgs.push_back("-tune-cpu"); + CmdArgs.push_back(Args.MakeArgString(TuneCPU)); + } + } } void Clang::AddMIPSTargetArgs(const ArgList &Args, diff --git a/clang/test/Driver/aarch64-mtune.c b/clang/test/Driver/aarch64-mtune.c new file mode 100644 index 0000000000000..ae41f4a9983cd --- /dev/null +++ b/clang/test/Driver/aarch64-mtune.c @@ -0,0 +1,42 @@ +// Ensure we support the -mtune flag. + +// There shouldn't be a default -mtune. +// RUN: %clang -target aarch64-unknown-unknown -c -### %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix=NOTUNE +// NOTUNE-NOT: "-tune-cpu" "generic" + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mtune=generic 2>&1 \ +// RUN: | FileCheck %s -check-prefix=GENERIC +// GENERIC: "-tune-cpu" "generic" + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mtune=neoverse-n1 2>&1 \ +// RUN: | FileCheck %s -check-prefix=NEOVERSE-N1 +// NEOVERSE-N1: "-tune-cpu" "neoverse-n1" + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mtune=thunderx2t99 2>&1 \ +// RUN: | FileCheck %s -check-prefix=THUNDERX2T99 +// THUNDERX2T99: "-tune-cpu" "thunderx2t99" + +// Check interaction between march and mtune. + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -march=armv8-a 2>&1 \ +// RUN: | FileCheck %s -check-prefix=MARCHARMV8A +// MARCHARMV8A: "-target-cpu" "generic" +// MARCHARMV8A-NOT: "-tune-cpu" "generic" + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -march=armv8-a -mtune=cortex-a75 2>&1 \ +// RUN: | FileCheck %s -check-prefix=MARCHARMV8A-A75 +// MARCHARMV8A-A75: "-target-cpu" "generic" +// MARCHARMV8A-A75: "-tune-cpu" "cortex-a75" + +// Check interaction between mcpu and mtune. + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mcpu=thunderx 2>&1 \ +// RUN: | FileCheck %s -check-prefix=MCPUTHUNDERX +// MCPUTHUNDERX: "-target-cpu" "thunderx" +// MCPUTHUNDERX-NOT: "-tune-cpu" + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mcpu=cortex-a75 -mtune=cortex-a57 2>&1 \ +// RUN: | FileCheck %s -check-prefix=MCPUA75-MTUNEA57 +// MCPUA75-MTUNEA57: "-target-cpu" "cortex-a75" +// MCPUA75-MTUNEA57: "-tune-cpu" "cortex-a57" diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 4bac29cb1cd1c..ec8a2e4ae882c 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -74,6 +74,10 @@ Changes to the AArch64 Backend ------------------------------ * Added support for the Armv9-A, Armv9.1-A and Armv9.2-A architectures. +* The compiler now recognises the "tune-cpu" function attribute to support +the use of the -mtune frontend flag. This allows certain scheduling features +and optimisations to be enabled independently of the architecture. If the +"tune-cpu" attribute is absent it tunes according to the "target-cpu". Changes to the ARM Backend -------------------------- diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 428b6eff85baa..30dd67e619beb 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -50,15 +50,17 @@ static cl::opt<bool> static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen.")); -AArch64Subtarget & -AArch64Subtarget::initializeSubtargetDependencies(StringRef FS, - StringRef CPUString) { +AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies( + StringRef FS, StringRef CPUString, StringRef TuneCPUString) { // Determine default and user-specified characteristics if (CPUString.empty()) CPUString = "generic"; - ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS); + if (TuneCPUString.empty()) + TuneCPUString = CPUString; + + ParseSubtargetFeatures(CPUString, TuneCPUString, FS); initializeProperties(); return *this; @@ -198,18 +200,20 @@ void AArch64Subtarget::initializeProperties() { } AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, + const std::string &TuneCPU, const std::string &FS, const TargetMachine &TM, bool LittleEndian, unsigned MinSVEVectorSizeInBitsOverride, unsigned MaxSVEVectorSizeInBitsOverride) - : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), + : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS), ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), IsLittle(LittleEndian), MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), - FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)), - TSInfo(), TLInfo(TM, *this) { + FrameLowering(), + InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)), TSInfo(), + TLInfo(TM, *this) { if (AArch64::isX18ReservedByDefault(TT)) ReserveXRegister.set(18); diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index d48f52cc8d210..a7cdc1d7a125c 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -298,7 +298,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { /// passed in feature string so that we can use initializer lists for /// subtarget initialization. AArch64Subtarget &initializeSubtargetDependencies(StringRef FS, - StringRef CPUString); + StringRef CPUString, + StringRef TuneCPUString); /// Initialize properties based on the selected processor family. void initializeProperties(); @@ -307,8 +308,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { /// This constructor initializes the data members to match that /// of the specified triple. AArch64Subtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, const TargetMachine &TM, - bool LittleEndian, + const std::string &TuneCPU, const std::string &FS, + const TargetMachine &TM, bool LittleEndian, unsigned MinSVEVectorSizeInBitsOverride = 0, unsigned MaxSVEVectorSizeInBitsOverride = 0); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 4e6016e5487e9..6cfbeaa2fb2f0 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -355,10 +355,13 @@ AArch64TargetMachine::~AArch64TargetMachine() = default; const AArch64Subtarget * AArch64TargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute TuneAttr = F.getFnAttribute("tune-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); std::string CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; + std::string TuneCPU = + TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; std::string FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; @@ -399,6 +402,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { Key += "SVEMax"; Key += std::to_string(MaxSVEVectorSize); Key += CPU; + Key += TuneCPU; Key += FS; auto &I = SubtargetMap[Key]; @@ -407,8 +411,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this, - isLittle, MinSVEVectorSize, + I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, TuneCPU, FS, + *this, isLittle, MinSVEVectorSize, MaxSVEVectorSize); } return I.get(); diff --git a/llvm/unittests/Target/AArch64/InstSizes.cpp b/llvm/unittests/Target/AArch64/InstSizes.cpp index 6ff985fe1876e..e8528bb2445d2 100644 --- a/llvm/unittests/Target/AArch64/InstSizes.cpp +++ b/llvm/unittests/Target/AArch64/InstSizes.cpp @@ -29,6 +29,7 @@ std::unique_ptr<LLVMTargetMachine> createTargetMachine() { std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) { AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetCPU()), std::string(TM->getTargetFeatureString()), *TM, /* isLittle */ false); return std::make_unique<AArch64InstrInfo>(ST); diff --git a/llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp b/llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp index 323848ce44b15..d1693e9b17abd 100644 --- a/llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp +++ b/llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp @@ -26,6 +26,7 @@ std::unique_ptr<LLVMTargetMachine> createTargetMachine() { std::unique_ptr<AArch64InstrInfo> createInstrInfo(TargetMachine *TM) { AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetCPU()), std::string(TM->getTargetFeatureString()), *TM, /* isLittle */ false); return std::make_unique<AArch64InstrInfo>(ST); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits