https://github.com/asavonic updated https://github.com/llvm/llvm-project/pull/174995
>From 010d9d26c3e7898ef3499b906b039debdae22d7b Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Fri, 19 Dec 2025 16:12:39 +0900 Subject: [PATCH 01/12] [clang][LTO] Emit symbols for global inline assembly as module flags ModuleSymbolTable used to get symbols from global inline assembly by running the target asm parser with a generic CPU and no target flags. This caused problems for top-level inline assembly where instructions require a target feature. For example, in global-inline-asm-flags.c we have PACIB and RETAB instructions that need a +pauth target flag. This test used to fail with a diagnostic: <inline asm>:4:1: error: instruction requires: pauth 4 | pacib x30, x27 <inline asm>:5:1: error: instruction requires: pauth 5 | retab The patch resolves this problem by moving assembly parsing to clang, where we have correct CPU and Features and can initialize asm parser correctly. Clang now records all symbols and symvers as module flags with ModFlagBehavior::Append. This ensures that when modules are linked, these flags are consistent with (merged) inline asm. This issue was previously discussed in https://discourse.llvm.org/t/rfc-target-cpu-and-features-for-module-level-inline-assembly/74713, and in the issue #67698 "LTO scan of module-level inline assembly does not respect CPU". --- clang/lib/CodeGen/CodeGenModule.cpp | 79 +++++++++ .../CodeGen/AArch64/global-inline-asm-flags.c | 42 +++++ llvm/include/llvm/Object/ModuleSymbolTable.h | 32 +++- llvm/lib/Object/ModuleSymbolTable.cpp | 150 ++++++++++++------ .../AArch64/Inputs/global-inline-asm-flags.ll | 30 ++++ .../LTO/AArch64/global-inline-asm-flags.ll | 68 ++++++++ 6 files changed, 347 insertions(+), 54 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/global-inline-asm-flags.c create mode 100644 llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll create mode 100644 llvm/test/LTO/AArch64/global-inline-asm-flags.ll diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index c7f0997f6e972..93499f8be5e79 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -55,10 +55,13 @@ #include "llvm/IR/AttributeMask.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" @@ -950,6 +953,76 @@ static bool isStackProtectorOn(const LangOptions &LangOpts, return LangOpts.getStackProtector() == Mode; } +// Emit module flags for symbols and symvers defined in global inline +// assembly. This allows LLVM IR tools to build a symbol table for an +// IR module without knowing exact CPU and Features required to parse +// its global inline assembly. +static void emitGlobalAsmSymbols(llvm::Module &M, StringRef CPU, + StringRef Features) { + llvm::LLVMContext &Ctx = M.getContext(); + bool HaveErrors = false; + + auto DiagHandler = [&](const llvm::DiagnosticInfo &DI) { + // Ignore diagnostics from the assembly parser. + // + // Errors in assembly mean that we cannot build a symbol table + // from it. However, we do not diagnose them here in Clang, + // because we don't know if the Module is ever going to actually + // reach CodeGen where this would matter. + if (DI.getSeverity() == llvm::DS_Error) { + HaveErrors = true; + } + }; + + // Build global-asm-symbols as a list of pairs (name, flags bitmask). + SmallVector<llvm::Metadata *, 16> Symbols; + llvm::ModuleSymbolTable::CollectAsmSymbols( + M, + [&](StringRef Name, llvm::object::BasicSymbolRef::Flags Flags) { + Symbols.push_back(llvm::MDNode::get( + Ctx, {llvm::MDString::get(Ctx, Name), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), Flags))})); + }, + DiagHandler, CPU, Features); + + if (Symbols.empty() || HaveErrors) { + return; + } + + M.addModuleFlag(llvm::Module::Append, "global-asm-symbols", + llvm::MDNode::get(Ctx, Symbols)); + + // Build global-asm-symvers as a list of lists (name, followed by all + // aliases). + llvm::MapVector<StringRef, SmallVector<llvm::Metadata *, 2>> SymversMap; + llvm::ModuleSymbolTable::CollectAsmSymvers( + M, + [&](StringRef Name, StringRef Alias) { + auto ItNew = SymversMap.try_emplace(Name); + SmallVector<llvm::Metadata *, 2> &Aliases = ItNew.first->second; + if (ItNew.second) { + // If it is a new list, insert the primary name at the + // front. + Aliases.push_back(llvm::MDString::get(Ctx, Name)); + } + Aliases.push_back(llvm::MDString::get(Ctx, Alias)); + }, + DiagHandler, CPU, Features); + + if (SymversMap.empty() || HaveErrors) { + return; + } + + SmallVector<llvm::Metadata *, 16> Symvers; + for (const auto &KV : SymversMap) { + Symvers.push_back(llvm::MDNode::get(Ctx, KV.second)); + } + + M.addModuleFlag(llvm::Module::Append, "global-asm-symvers", + llvm::MDNode::get(Ctx, Symvers)); +} + void CodeGenModule::Release() { Module *Primary = getContext().getCurrentNamedModule(); if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule()) @@ -1566,6 +1639,12 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Error, "MaxTLSAlign", getContext().getTargetInfo().getMaxTLSAlign()); + // Emit module flags for global inline assembly symbols. + if (!TheModule.getModuleInlineAsm().empty()) { + emitGlobalAsmSymbols(TheModule, getTarget().getTargetOpts().CPU, + llvm::join(getTarget().getTargetOpts().Features, ",")); + } + getTargetCodeGenInfo().emitTargetGlobals(*this); getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); diff --git a/clang/test/CodeGen/AArch64/global-inline-asm-flags.c b/clang/test/CodeGen/AArch64/global-inline-asm-flags.c new file mode 100644 index 0000000000000..7d4e82d95e09a --- /dev/null +++ b/clang/test/CodeGen/AArch64/global-inline-asm-flags.c @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +pauth -flto=thin -emit-llvm -o - %s | FileCheck %s +// REQUIRES: aarch64-registered-target + +asm ( + ".text" "\n" + ".balign 16" "\n" + ".globl foo\n" + "pacib x30, x27" "\n" + "retab" "\n" + ".symver foo, foo@VER" "\n" + ".symver foo, foo@ANOTHERVER" "\n" + ".globl bar\n" + "pacib x30, x27" "\n" + "retab" "\n" + ".symver bar, bar@VER" "\n" + ".previous" "\n" +); + +// CHECK: module asm ".text" +// CHECK: module asm ".balign 16" +// CHECK: module asm ".globl foo" +// CHECK: module asm "pacib x30, x27" +// CHECK: module asm "retab" +// CHECK: module asm ".symver foo, foo@VER" +// CHECK: module asm ".symver foo, foo@ANOTHERVER" +// CHECK: module asm ".globl bar" +// CHECK: module asm "pacib x30, x27" +// CHECK: module asm "retab" +// CHECK: module asm ".symver bar, bar@VER" +// CHECK: module asm ".previous" + +// CHECK: !{{.*}} = !{i32 5, !"global-asm-symbols", ![[SYM:[0-9]+]]} +// CHECK: ![[SYM]] = !{![[SBAR1:[0-9]+]], ![[SBAR2:[0-9]+]], ![[SBAR3:[0-9]+]], ![[SFOO1:[0-9]+]], ![[SFOO2:[0-9]+]]} +// CHECK: ![[SBAR1]] = !{!"bar", i32 2051} +// CHECK: ![[SBAR2]] = !{!"bar@VER", i32 2051} +// CHECK: ![[SBAR3]] = !{!"foo@ANOTHERVER", i32 2051} +// CHECK: ![[SFOO1]] = !{!"foo", i32 2051} +// CHECK: ![[SFOO2]] = !{!"foo@VER", i32 2051} +// CHECK: !{{.*}} = !{i32 5, !"global-asm-symvers", ![[SYMVER:[0-9]+]]} +// CHECK: ![[SYMVER]] = !{![[VFOO:[0-9]+]], ![[VBAR:[0-9]+]]} +// CHECK: ![[VFOO:[0-9]+]] = !{!"foo", !"foo@VER", !"foo@ANOTHERVER"} +// CHECK: ![[VBAR:[0-9]+]] = !{!"bar", !"bar@VER"} diff --git a/llvm/include/llvm/Object/ModuleSymbolTable.h b/llvm/include/llvm/Object/ModuleSymbolTable.h index 564ce76b3feb1..23604e1f9c26b 100644 --- a/llvm/include/llvm/Object/ModuleSymbolTable.h +++ b/llvm/include/llvm/Object/ModuleSymbolTable.h @@ -30,6 +30,7 @@ namespace llvm { class GlobalValue; class Module; +class DiagnosticInfo; class ModuleSymbolTable { public: @@ -45,7 +46,8 @@ class ModuleSymbolTable { public: ArrayRef<Symbol> symbols() const { return SymTab; } - LLVM_ABI void addModule(Module *M); + LLVM_ABI void addModule(Module *M, StringRef CPU = "", + StringRef Features = ""); LLVM_ABI void printSymbolName(raw_ostream &OS, Symbol S) const; LLVM_ABI uint32_t getSymbolFlags(Symbol S) const; @@ -55,18 +57,38 @@ class ModuleSymbolTable { /// /// For each found symbol, call \p AsmSymbol with the name of the symbol found /// and the associated flags. + /// + /// The function attempts to use global-asm-symbols module flag if + /// it is present. Otherwise it parses assembly with the provided \p + /// CPU and \p Features and calls \p DiagHandler for any + /// diagnostics. + /// + /// If \p DiagHandler is not provided, the function calls + /// LLVMContext::diagnose() instead. LLVM_ABI static void CollectAsmSymbols( const Module &M, - function_ref<void(StringRef, object::BasicSymbolRef::Flags)> AsmSymbol); + function_ref<void(StringRef, object::BasicSymbolRef::Flags)> AsmSymbol, + function_ref<void(const DiagnosticInfo &DI)> DiagHandler = nullptr, + StringRef CPU = "", StringRef Features = ""); /// Parse inline ASM and collect the symvers directives that are defined in /// the current module. /// /// For each found symbol, call \p AsmSymver with the name of the symbol and /// its alias. - LLVM_ABI static void - CollectAsmSymvers(const Module &M, - function_ref<void(StringRef, StringRef)> AsmSymver); + /// + /// The function attempts to use global-asm-symvers module flag if + /// it is present. Otherwise it parses assembly with the provided \p + /// CPU and \p Features and calls \p DiagHandler for any + /// diagnostics. + /// + /// If \p DiagHandler is not provided, the function calls + /// LLVMContext::diagnose() instead. + + LLVM_ABI static void CollectAsmSymvers( + const Module &M, function_ref<void(StringRef, StringRef)> AsmSymver, + function_ref<void(const DiagnosticInfo &DI)> DiagHandler = nullptr, + StringRef CPU = "", StringRef Features = ""); }; } // end namespace llvm diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp index 9442becdb7d33..7bb6e22a7ca8a 100644 --- a/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/llvm/lib/Object/ModuleSymbolTable.cpp @@ -15,6 +15,7 @@ #include "llvm/Object/ModuleSymbolTable.h" #include "RecordStreamer.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" @@ -49,7 +50,8 @@ using namespace llvm; using namespace object; -void ModuleSymbolTable::addModule(Module *M) { +void ModuleSymbolTable::addModule(Module *M, StringRef CPU, + StringRef Features) { if (FirstMod) assert(FirstMod->getTargetTriple() == M->getTargetTriple()); else @@ -58,15 +60,19 @@ void ModuleSymbolTable::addModule(Module *M) { for (GlobalValue &GV : M->global_values()) SymTab.push_back(&GV); - CollectAsmSymbols(*M, [this](StringRef Name, BasicSymbolRef::Flags Flags) { - SymTab.push_back(new (AsmSymbols.Allocate()) - AsmSymbol(std::string(Name), Flags)); - }); + CollectAsmSymbols( + *M, + [this](StringRef Name, BasicSymbolRef::Flags Flags) { + SymTab.push_back(new (AsmSymbols.Allocate()) + AsmSymbol(std::string(Name), Flags)); + }, + /*DiagHandler=*/nullptr, CPU, Features); } -static void -initializeRecordStreamer(const Module &M, - function_ref<void(RecordStreamer &)> Init) { +static void initializeRecordStreamer( + const Module &M, StringRef CPU, StringRef Features, + function_ref<void(RecordStreamer &)> Init, + function_ref<void(const DiagnosticInfo &DI)> DiagHandler) { // This function may be called twice, once for ModuleSummaryIndexAnalysis and // the other when writing the IR symbol table. If parsing inline assembly has // caused errors in the first run, suppress the second run. @@ -90,7 +96,8 @@ initializeRecordStreamer(const Module &M, if (!MAI) return; - std::unique_ptr<MCSubtargetInfo> STI(T->createMCSubtargetInfo(TT, "", "")); + std::unique_ptr<MCSubtargetInfo> STI( + T->createMCSubtargetInfo(TT, CPU, Features)); if (!STI) return; @@ -121,8 +128,12 @@ initializeRecordStreamer(const Module &M, MCCtx.setDiagnosticHandler([&](const SMDiagnostic &SMD, bool IsInlineAsm, const SourceMgr &SrcMgr, std::vector<const MDNode *> &LocInfos) { - M.getContext().diagnose( - DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, /*LocCookie=*/0)); + DiagnosticInfoSrcMgr Diag(SMD, M.getName(), IsInlineAsm, /*LocCookie=*/0); + if (DiagHandler) { + DiagHandler(Diag); + return; + } + M.getContext().diagnose(Diag); }); // Module-level inline asm is assumed to use At&t syntax (see @@ -138,39 +149,60 @@ initializeRecordStreamer(const Module &M, void ModuleSymbolTable::CollectAsmSymbols( const Module &M, - function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { - initializeRecordStreamer(M, [&](RecordStreamer &Streamer) { - Streamer.flushSymverDirectives(); - - for (auto &KV : Streamer) { - StringRef Key = KV.first(); - RecordStreamer::State Value = KV.second; - // FIXME: For now we just assume that all asm symbols are executable. - uint32_t Res = BasicSymbolRef::SF_Executable; - switch (Value) { - case RecordStreamer::NeverSeen: - llvm_unreachable("NeverSeen should have been replaced earlier"); - case RecordStreamer::DefinedGlobal: - Res |= BasicSymbolRef::SF_Global; - break; - case RecordStreamer::Defined: - break; - case RecordStreamer::Global: - case RecordStreamer::Used: - Res |= BasicSymbolRef::SF_Undefined; - Res |= BasicSymbolRef::SF_Global; - break; - case RecordStreamer::DefinedWeak: - Res |= BasicSymbolRef::SF_Weak; - Res |= BasicSymbolRef::SF_Global; - break; - case RecordStreamer::UndefinedWeak: - Res |= BasicSymbolRef::SF_Weak; - Res |= BasicSymbolRef::SF_Undefined; - } - AsmSymbol(Key, BasicSymbolRef::Flags(Res)); + function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol, + function_ref<void(const DiagnosticInfo &DI)> DiagHandler, StringRef CPU, + StringRef Features) { + + MDTuple *SymbolsMD = + dyn_cast_if_present<MDTuple>(M.getModuleFlag("global-asm-symbols")); + + if (SymbolsMD) { + for (const Metadata *MD : SymbolsMD->operands()) { + const MDTuple *SymMD = cast<MDTuple>(MD); + const MDString *Name = cast<MDString>(SymMD->getOperand(0)); + const ConstantInt *Flags = + mdconst::extract<ConstantInt>(SymMD->getOperand(1)); + AsmSymbol(Name->getString(), + static_cast<BasicSymbolRef::Flags>(Flags->getZExtValue())); } - }); + return; + } + + initializeRecordStreamer( + M, CPU, Features, + [&](RecordStreamer &Streamer) { + Streamer.flushSymverDirectives(); + + for (auto &KV : Streamer) { + StringRef Key = KV.first(); + RecordStreamer::State Value = KV.second; + // FIXME: For now we just assume that all asm symbols are executable. + uint32_t Res = BasicSymbolRef::SF_Executable; + switch (Value) { + case RecordStreamer::NeverSeen: + llvm_unreachable("NeverSeen should have been replaced earlier"); + case RecordStreamer::DefinedGlobal: + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::Defined: + break; + case RecordStreamer::Global: + case RecordStreamer::Used: + Res |= BasicSymbolRef::SF_Undefined; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::DefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::UndefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Undefined; + } + AsmSymbol(Key, BasicSymbolRef::Flags(Res)); + } + }, + DiagHandler); // In ELF, object code generated for x86-32 and some code models of x86-64 may // reference the special symbol _GLOBAL_OFFSET_TABLE_ that is not used in the @@ -188,12 +220,32 @@ void ModuleSymbolTable::CollectAsmSymbols( } void ModuleSymbolTable::CollectAsmSymvers( - const Module &M, function_ref<void(StringRef, StringRef)> AsmSymver) { - initializeRecordStreamer(M, [&](RecordStreamer &Streamer) { - for (auto &KV : Streamer.symverAliases()) - for (auto &Alias : KV.second) - AsmSymver(KV.first->getName(), Alias); - }); + const Module &M, function_ref<void(StringRef, StringRef)> AsmSymver, + function_ref<void(const DiagnosticInfo &DI)> DiagHandler, StringRef CPU, + StringRef Features) { + + MDTuple *SymversMD = + dyn_cast_if_present<MDTuple>(M.getModuleFlag("global-asm-symvers")); + + if (SymversMD) { + for (const Metadata *MD : SymversMD->operands()) { + const MDTuple *SymverMD = cast<MDTuple>(MD); + StringRef Name = cast<MDString>(SymverMD->getOperand(0))->getString(); + for (unsigned i = 1; i < SymverMD->getNumOperands(); ++i) { + AsmSymver(Name, cast<MDString>(SymverMD->getOperand(i))->getString()); + } + } + return; + } + + initializeRecordStreamer( + M, CPU, Features, + [&](RecordStreamer &Streamer) { + for (auto &KV : Streamer.symverAliases()) + for (auto &Alias : KV.second) + AsmSymver(KV.first->getName(), Alias); + }, + DiagHandler); } void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const { diff --git a/llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll b/llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll new file mode 100644 index 0000000000000..349d8e37fc488 --- /dev/null +++ b/llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll @@ -0,0 +1,30 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64-none-linux-gnu" + +module asm ".text" +module asm ".balign 16" +module asm ".globl foo" +module asm "pacib x30, x27" +module asm "retab" +module asm ".symver foo, foo@VER" +module asm ".symver foo, foo@ANOTHERVER" +module asm ".globl bar" +module asm "pacib x30, x27" +module asm "retab" +module asm ".symver bar, bar@VER" +module asm ".previous" + +!llvm.module.flags = !{!1, !8} + +!1 = !{i32 5, !"global-asm-symbols", !2} +!2 = !{!3, !4, !5, !6, !7} +!3 = !{!"bar", i32 2051} +!4 = !{!"bar@VER", i32 2051} +!5 = !{!"foo@ANOTHERVER", i32 2051} +!6 = !{!"foo", i32 2051} +!7 = !{!"foo@VER", i32 2051} +!8 = !{i32 5, !"global-asm-symvers", !9} +!9 = !{!10, !11} +!10 = !{!"foo", !"foo@VER", !"foo@ANOTHERVER"} +!11 = !{!"bar", !"bar@VER"} + diff --git a/llvm/test/LTO/AArch64/global-inline-asm-flags.ll b/llvm/test/LTO/AArch64/global-inline-asm-flags.ll new file mode 100644 index 0000000000000..173cc014c8852 --- /dev/null +++ b/llvm/test/LTO/AArch64/global-inline-asm-flags.ll @@ -0,0 +1,68 @@ +; RUN: llvm-as %s -o %t1.bc +; RUN: llvm-as %p/Inputs/global-inline-asm-flags.ll -o %t2.bc +; RUN: llvm-lto -save-merged-module -filetype=asm -mattr=+pauth %t1.bc %t2.bc -o %t3 +; RUN: llvm-dis %t3.merged.bc -o - | FileCheck %s + +; Note that -mattr=+pauth for llvm-lto is still required, because it +; runs full CodeGen at the end. Symbols and Symvers are still +; extracted from metadata. + +; CHECK: module asm ".text" +; CHECK: module asm ".balign 16" +; CHECK: module asm ".globl baz" +; CHECK: module asm "pacib x30, x27" +; CHECK: module asm "retab" +; CHECK: module asm ".symver baz, baz@VER" +; CHECK: module asm ".symver foo, foo@LINKEDVER" +; CHECK: module asm ".previous" +; CHECK: module asm ".text" +; CHECK: module asm ".balign 16" +; CHECK: module asm ".globl foo" +; CHECK: module asm "pacib x30, x27" +; CHECK: module asm "retab" +; CHECK: module asm ".symver foo, foo@VER" +; CHECK: module asm ".symver foo, foo@ANOTHERVER" +; CHECK: module asm ".globl bar" +; CHECK: module asm "pacib x30, x27" +; CHECK: module asm "retab" +; CHECK: module asm ".symver bar, bar@VER" +; CHECK: module asm ".previous" + +; CHECK: !{{[0-9]+}} = distinct !{i32 5, !"global-asm-symbols", ![[SYM:[0-9]+]]} +; CHECK: ![[SYM]] = distinct !{![[SBAZ1:[0-9]+]], ![[SBAZ2:[0-9]+]], ![[SFOO1:[0-9]+]], ![[SBAR1:[0-9]+]], ![[SBAR2:[0-9]+]], ![[SFOO2:[0-9]+]], ![[SFOO3:[0-9]+]], ![[SFOO4:[0-9]+]]} +; CHECK: ![[SBAZ1]] = !{!"baz", i32 2051} +; CHECK: ![[SBAZ2]] = !{!"baz@VER", i32 2051} +; CHECK: ![[SFOO1]] = !{!"foo@LINKEDVER", i32 2051} +; CHECK: ![[SBAR1]] = !{!"bar", i32 2051} +; CHECK: ![[SBAR2]] = !{!"bar@VER", i32 2051} +; CHECK: ![[SFOO2]] = !{!"foo@ANOTHERVER", i32 2051} +; CHECK: ![[SFOO3]] = !{!"foo", i32 2051} +; CHECK: ![[SFOO4]] = !{!"foo@VER", i32 2051} + +; CHECK: !{{[0-9]+}} = distinct !{i32 5, !"global-asm-symvers", ![[SYMVER:[0-9]+]]} +; CHECK: ![[SYMVER]] = distinct !{![[VBAZ:[0-9]+]], ![[VFOO1:[0-9]+]], ![[VFOO2:[0-9]+]], ![[VBAR:[0-9]+]]} +; CHECK: ![[VBAZ]] = !{!"baz", !"baz@VER"} +; CHECK: ![[VFOO1]] = !{!"foo", !"foo@LINKEDVER"} +; CHECK: ![[VFOO2]] = !{!"foo", !"foo@VER", !"foo@ANOTHERVER"} +; CHECK: ![[VBAR]] = !{!"bar", !"bar@VER"} + +module asm ".text" +module asm ".balign 16" +module asm ".globl baz" +module asm "pacib x30, x27" +module asm "retab" +module asm ".symver baz, baz@VER" +module asm ".symver foo, foo@LINKEDVER" +module asm ".previous" + +!llvm.module.flags = !{!0, !5} + +!0 = !{i32 5, !"global-asm-symbols", !1} +!1 = !{!2, !3, !4} +!2 = !{!"baz", i32 2051} +!3 = !{!"baz@VER", i32 2051} +!4 = !{!"foo@LINKEDVER", i32 2051} +!5 = !{i32 5, !"global-asm-symvers", !6} +!6 = !{!7, !8} +!7 = !{!"baz", !"baz@VER"} +!8 = !{!"foo", !"foo@LINKEDVER"} >From 62c5b2272944e39ad051361d7764dbc496a79a3c Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Wed, 21 Jan 2026 21:26:43 +0900 Subject: [PATCH 02/12] Move metadata emission to ModuleSymbolTable --- clang/lib/CodeGen/CodeGenModule.cpp | 75 +------- llvm/include/llvm/Object/ModuleSymbolTable.h | 7 + llvm/lib/Object/ModuleSymbolTable.cpp | 188 ++++++++++++++----- 3 files changed, 150 insertions(+), 120 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 93499f8be5e79..86eaec3da12d3 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -953,76 +953,6 @@ static bool isStackProtectorOn(const LangOptions &LangOpts, return LangOpts.getStackProtector() == Mode; } -// Emit module flags for symbols and symvers defined in global inline -// assembly. This allows LLVM IR tools to build a symbol table for an -// IR module without knowing exact CPU and Features required to parse -// its global inline assembly. -static void emitGlobalAsmSymbols(llvm::Module &M, StringRef CPU, - StringRef Features) { - llvm::LLVMContext &Ctx = M.getContext(); - bool HaveErrors = false; - - auto DiagHandler = [&](const llvm::DiagnosticInfo &DI) { - // Ignore diagnostics from the assembly parser. - // - // Errors in assembly mean that we cannot build a symbol table - // from it. However, we do not diagnose them here in Clang, - // because we don't know if the Module is ever going to actually - // reach CodeGen where this would matter. - if (DI.getSeverity() == llvm::DS_Error) { - HaveErrors = true; - } - }; - - // Build global-asm-symbols as a list of pairs (name, flags bitmask). - SmallVector<llvm::Metadata *, 16> Symbols; - llvm::ModuleSymbolTable::CollectAsmSymbols( - M, - [&](StringRef Name, llvm::object::BasicSymbolRef::Flags Flags) { - Symbols.push_back(llvm::MDNode::get( - Ctx, {llvm::MDString::get(Ctx, Name), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - llvm::Type::getInt32Ty(Ctx), Flags))})); - }, - DiagHandler, CPU, Features); - - if (Symbols.empty() || HaveErrors) { - return; - } - - M.addModuleFlag(llvm::Module::Append, "global-asm-symbols", - llvm::MDNode::get(Ctx, Symbols)); - - // Build global-asm-symvers as a list of lists (name, followed by all - // aliases). - llvm::MapVector<StringRef, SmallVector<llvm::Metadata *, 2>> SymversMap; - llvm::ModuleSymbolTable::CollectAsmSymvers( - M, - [&](StringRef Name, StringRef Alias) { - auto ItNew = SymversMap.try_emplace(Name); - SmallVector<llvm::Metadata *, 2> &Aliases = ItNew.first->second; - if (ItNew.second) { - // If it is a new list, insert the primary name at the - // front. - Aliases.push_back(llvm::MDString::get(Ctx, Name)); - } - Aliases.push_back(llvm::MDString::get(Ctx, Alias)); - }, - DiagHandler, CPU, Features); - - if (SymversMap.empty() || HaveErrors) { - return; - } - - SmallVector<llvm::Metadata *, 16> Symvers; - for (const auto &KV : SymversMap) { - Symvers.push_back(llvm::MDNode::get(Ctx, KV.second)); - } - - M.addModuleFlag(llvm::Module::Append, "global-asm-symvers", - llvm::MDNode::get(Ctx, Symvers)); -} - void CodeGenModule::Release() { Module *Primary = getContext().getCurrentNamedModule(); if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule()) @@ -1641,8 +1571,9 @@ void CodeGenModule::Release() { // Emit module flags for global inline assembly symbols. if (!TheModule.getModuleInlineAsm().empty()) { - emitGlobalAsmSymbols(TheModule, getTarget().getTargetOpts().CPU, - llvm::join(getTarget().getTargetOpts().Features, ",")); + llvm::ModuleSymbolTable::EmitModuleFlags( + TheModule, getTarget().getTargetOpts().CPU, + llvm::join(getTarget().getTargetOpts().Features, ",")); } getTargetCodeGenInfo().emitTargetGlobals(*this); diff --git a/llvm/include/llvm/Object/ModuleSymbolTable.h b/llvm/include/llvm/Object/ModuleSymbolTable.h index 23604e1f9c26b..cd14510304dce 100644 --- a/llvm/include/llvm/Object/ModuleSymbolTable.h +++ b/llvm/include/llvm/Object/ModuleSymbolTable.h @@ -89,6 +89,13 @@ class ModuleSymbolTable { const Module &M, function_ref<void(StringRef, StringRef)> AsmSymver, function_ref<void(const DiagnosticInfo &DI)> DiagHandler = nullptr, StringRef CPU = "", StringRef Features = ""); + + /// Emit module flags for symbols and symvers defined in global inline + /// assembly. This allows LLVM IR tools to build a symbol table for an + /// IR module without knowing exact CPU and Features required to parse + /// its global inline assembly. + LLVM_ABI static bool EmitModuleFlags(Module &M, StringRef CPU = "", + StringRef Features = ""); }; } // end namespace llvm diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp index 7bb6e22a7ca8a..fe6b14441dcad 100644 --- a/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/llvm/lib/Object/ModuleSymbolTable.cpp @@ -147,6 +147,59 @@ static void initializeRecordStreamer( Init(Streamer); } +static void addSpecialSymbols( + const Module &M, + function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { + // In ELF, object code generated for x86-32 and some code models of x86-64 may + // reference the special symbol _GLOBAL_OFFSET_TABLE_ that is not used in the + // IR. Record it like inline asm symbols. + Triple TT(M.getTargetTriple()); + if (!TT.isOSBinFormatELF() || !TT.isX86()) + return; + auto CM = M.getCodeModel(); + if (TT.getArch() == Triple::x86 || CM == CodeModel::Medium || + CM == CodeModel::Large) { + AsmSymbol("_GLOBAL_OFFSET_TABLE_", + BasicSymbolRef::Flags(BasicSymbolRef::SF_Undefined | + BasicSymbolRef::SF_Global)); + } +} + +static void +addSymbols(RecordStreamer &Streamer, + function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { + Streamer.flushSymverDirectives(); + + for (auto &KV : Streamer) { + StringRef Key = KV.first(); + RecordStreamer::State Value = KV.second; + // FIXME: For now we just assume that all asm symbols are executable. + uint32_t Res = BasicSymbolRef::SF_Executable; + switch (Value) { + case RecordStreamer::NeverSeen: + llvm_unreachable("NeverSeen should have been replaced earlier"); + case RecordStreamer::DefinedGlobal: + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::Defined: + break; + case RecordStreamer::Global: + case RecordStreamer::Used: + Res |= BasicSymbolRef::SF_Undefined; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::DefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::UndefinedWeak: + Res |= BasicSymbolRef::SF_Weak; + Res |= BasicSymbolRef::SF_Undefined; + } + AsmSymbol(Key, BasicSymbolRef::Flags(Res)); + } +} + void ModuleSymbolTable::CollectAsmSymbols( const Module &M, function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol, @@ -170,53 +223,17 @@ void ModuleSymbolTable::CollectAsmSymbols( initializeRecordStreamer( M, CPU, Features, - [&](RecordStreamer &Streamer) { - Streamer.flushSymverDirectives(); - - for (auto &KV : Streamer) { - StringRef Key = KV.first(); - RecordStreamer::State Value = KV.second; - // FIXME: For now we just assume that all asm symbols are executable. - uint32_t Res = BasicSymbolRef::SF_Executable; - switch (Value) { - case RecordStreamer::NeverSeen: - llvm_unreachable("NeverSeen should have been replaced earlier"); - case RecordStreamer::DefinedGlobal: - Res |= BasicSymbolRef::SF_Global; - break; - case RecordStreamer::Defined: - break; - case RecordStreamer::Global: - case RecordStreamer::Used: - Res |= BasicSymbolRef::SF_Undefined; - Res |= BasicSymbolRef::SF_Global; - break; - case RecordStreamer::DefinedWeak: - Res |= BasicSymbolRef::SF_Weak; - Res |= BasicSymbolRef::SF_Global; - break; - case RecordStreamer::UndefinedWeak: - Res |= BasicSymbolRef::SF_Weak; - Res |= BasicSymbolRef::SF_Undefined; - } - AsmSymbol(Key, BasicSymbolRef::Flags(Res)); - } - }, + [&](RecordStreamer &Streamer) { addSymbols(Streamer, AsmSymbol); }, DiagHandler); - // In ELF, object code generated for x86-32 and some code models of x86-64 may - // reference the special symbol _GLOBAL_OFFSET_TABLE_ that is not used in the - // IR. Record it like inline asm symbols. - Triple TT(M.getTargetTriple()); - if (!TT.isOSBinFormatELF() || !TT.isX86()) - return; - auto CM = M.getCodeModel(); - if (TT.getArch() == Triple::x86 || CM == CodeModel::Medium || - CM == CodeModel::Large) { - AsmSymbol("_GLOBAL_OFFSET_TABLE_", - BasicSymbolRef::Flags(BasicSymbolRef::SF_Undefined | - BasicSymbolRef::SF_Global)); - } + addSpecialSymbols(M, AsmSymbol); +} + +static void addSymvers(RecordStreamer &Streamer, + function_ref<void(StringRef, StringRef)> AsmSymver) { + for (auto &KV : Streamer.symverAliases()) + for (auto &Alias : KV.second) + AsmSymver(KV.first->getName(), Alias); } void ModuleSymbolTable::CollectAsmSymvers( @@ -238,14 +255,89 @@ void ModuleSymbolTable::CollectAsmSymvers( return; } + initializeRecordStreamer( + M, CPU, Features, + [&](RecordStreamer &Streamer) { addSymvers(Streamer, AsmSymver); }, + DiagHandler); +} + +bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, + StringRef Features) { + bool Changed = false; + llvm::LLVMContext &Ctx = M.getContext(); + + bool HaveErrors = false; + auto DiagHandler = [&](const llvm::DiagnosticInfo &DI) { + // Ignore diagnostics from the assembly parser. + // + // Errors in assembly mean that we cannot build a symbol table + // from it. However, we do not diagnose them here in Clang, + // because we don't know if the Module is ever going to actually + // reach CodeGen where this would matter. + if (DI.getSeverity() == llvm::DS_Error) { + HaveErrors = true; + } + }; + + // Build global-asm-symbols as a list of pairs (name, flags bitmask). + SmallVector<llvm::Metadata *, 16> Symbols; + + auto AsmSymbol = [&](StringRef Name, + llvm::object::BasicSymbolRef::Flags Flags) { + Symbols.push_back(llvm::MDNode::get( + Ctx, {llvm::MDString::get(Ctx, Name), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), Flags))})); + }; + + // Build global-asm-symvers as a list of lists (name, followed by all + // aliases). + llvm::MapVector<StringRef, SmallVector<llvm::Metadata *, 2>> SymversMap; + + auto AsmSymver = [&](StringRef Name, StringRef Alias) { + auto ItNew = SymversMap.try_emplace(Name); + SmallVector<llvm::Metadata *, 2> &Aliases = ItNew.first->second; + if (ItNew.second) { + // If it is a new list, insert the primary name at the + // front. + Aliases.push_back(llvm::MDString::get(Ctx, Name)); + } + Aliases.push_back(llvm::MDString::get(Ctx, Alias)); + }; + + // Parse global inline assembly and collect all symbols and symvers initializeRecordStreamer( M, CPU, Features, [&](RecordStreamer &Streamer) { - for (auto &KV : Streamer.symverAliases()) - for (auto &Alias : KV.second) - AsmSymver(KV.first->getName(), Alias); + addSymvers(Streamer, AsmSymver); + addSymbols(Streamer, AsmSymbol); }, DiagHandler); + + if (HaveErrors) { + return false; + } + + addSpecialSymbols(M, AsmSymbol); + + if (!Symbols.empty()) { + M.addModuleFlag(llvm::Module::Append, "global-asm-symbols", + llvm::MDNode::get(Ctx, Symbols)); + Changed = true; + } + + if (!SymversMap.empty()) { + SmallVector<llvm::Metadata *, 16> Symvers; + for (const auto &KV : SymversMap) { + Symvers.push_back(llvm::MDNode::get(Ctx, KV.second)); + } + + M.addModuleFlag(llvm::Module::Append, "global-asm-symvers", + llvm::MDNode::get(Ctx, Symvers)); + Changed = true; + } + + return Changed; } void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const { >From 8cbedc938d7d4a50ad5243c7cbed2d5b77e7690a Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Wed, 21 Jan 2026 21:42:13 +0900 Subject: [PATCH 03/12] Address code review comments --- llvm/lib/Object/ModuleSymbolTable.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp index fe6b14441dcad..3dc606045358b 100644 --- a/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/llvm/lib/Object/ModuleSymbolTable.cpp @@ -170,12 +170,10 @@ addSymbols(RecordStreamer &Streamer, function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { Streamer.flushSymverDirectives(); - for (auto &KV : Streamer) { - StringRef Key = KV.first(); - RecordStreamer::State Value = KV.second; + for (const auto &[Name, State] : Streamer) { // FIXME: For now we just assume that all asm symbols are executable. uint32_t Res = BasicSymbolRef::SF_Executable; - switch (Value) { + switch (State) { case RecordStreamer::NeverSeen: llvm_unreachable("NeverSeen should have been replaced earlier"); case RecordStreamer::DefinedGlobal: @@ -196,7 +194,7 @@ addSymbols(RecordStreamer &Streamer, Res |= BasicSymbolRef::SF_Weak; Res |= BasicSymbolRef::SF_Undefined; } - AsmSymbol(Key, BasicSymbolRef::Flags(Res)); + AsmSymbol(Name, BasicSymbolRef::Flags(Res)); } } @@ -231,9 +229,9 @@ void ModuleSymbolTable::CollectAsmSymbols( static void addSymvers(RecordStreamer &Streamer, function_ref<void(StringRef, StringRef)> AsmSymver) { - for (auto &KV : Streamer.symverAliases()) - for (auto &Alias : KV.second) - AsmSymver(KV.first->getName(), Alias); + for (const auto &[Name, Aliases] : Streamer.symverAliases()) + for (StringRef Alias : Aliases) + AsmSymver(Name->getName(), Alias); } void ModuleSymbolTable::CollectAsmSymvers( @@ -248,7 +246,7 @@ void ModuleSymbolTable::CollectAsmSymvers( for (const Metadata *MD : SymversMD->operands()) { const MDTuple *SymverMD = cast<MDTuple>(MD); StringRef Name = cast<MDString>(SymverMD->getOperand(0))->getString(); - for (unsigned i = 1; i < SymverMD->getNumOperands(); ++i) { + for (size_t i = 1, End = SymverMD->getNumOperands(); i < End; ++i) { AsmSymver(Name, cast<MDString>(SymverMD->getOperand(i))->getString()); } } @@ -274,9 +272,8 @@ bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, // from it. However, we do not diagnose them here in Clang, // because we don't know if the Module is ever going to actually // reach CodeGen where this would matter. - if (DI.getSeverity() == llvm::DS_Error) { + if (DI.getSeverity() == llvm::DS_Error) HaveErrors = true; - } }; // Build global-asm-symbols as a list of pairs (name, flags bitmask). @@ -328,6 +325,7 @@ bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, if (!SymversMap.empty()) { SmallVector<llvm::Metadata *, 16> Symvers; + Symvers.reserve(SymversMap.size()); for (const auto &KV : SymversMap) { Symvers.push_back(llvm::MDNode::get(Ctx, KV.second)); } >From fcabc8b1312c1b30a6b642b0c58c61f47398a487 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Wed, 21 Jan 2026 22:31:33 +0900 Subject: [PATCH 04/12] Keep the original interface of CollectAsmSymbols and CollectAsmSymvers --- llvm/include/llvm/Object/ModuleSymbolTable.h | 46 ++++++----------- llvm/lib/Object/ModuleSymbolTable.cpp | 53 +++++++++----------- 2 files changed, 37 insertions(+), 62 deletions(-) diff --git a/llvm/include/llvm/Object/ModuleSymbolTable.h b/llvm/include/llvm/Object/ModuleSymbolTable.h index cd14510304dce..9b079ff7fb1c3 100644 --- a/llvm/include/llvm/Object/ModuleSymbolTable.h +++ b/llvm/include/llvm/Object/ModuleSymbolTable.h @@ -30,7 +30,6 @@ namespace llvm { class GlobalValue; class Module; -class DiagnosticInfo; class ModuleSymbolTable { public: @@ -46,8 +45,7 @@ class ModuleSymbolTable { public: ArrayRef<Symbol> symbols() const { return SymTab; } - LLVM_ABI void addModule(Module *M, StringRef CPU = "", - StringRef Features = ""); + LLVM_ABI void addModule(Module *M); LLVM_ABI void printSymbolName(raw_ostream &OS, Symbol S) const; LLVM_ABI uint32_t getSymbolFlags(Symbol S) const; @@ -57,45 +55,29 @@ class ModuleSymbolTable { /// /// For each found symbol, call \p AsmSymbol with the name of the symbol found /// and the associated flags. - /// - /// The function attempts to use global-asm-symbols module flag if - /// it is present. Otherwise it parses assembly with the provided \p - /// CPU and \p Features and calls \p DiagHandler for any - /// diagnostics. - /// - /// If \p DiagHandler is not provided, the function calls - /// LLVMContext::diagnose() instead. LLVM_ABI static void CollectAsmSymbols( const Module &M, - function_ref<void(StringRef, object::BasicSymbolRef::Flags)> AsmSymbol, - function_ref<void(const DiagnosticInfo &DI)> DiagHandler = nullptr, - StringRef CPU = "", StringRef Features = ""); + function_ref<void(StringRef, object::BasicSymbolRef::Flags)> AsmSymbol); /// Parse inline ASM and collect the symvers directives that are defined in /// the current module. /// /// For each found symbol, call \p AsmSymver with the name of the symbol and /// its alias. - /// - /// The function attempts to use global-asm-symvers module flag if - /// it is present. Otherwise it parses assembly with the provided \p - /// CPU and \p Features and calls \p DiagHandler for any - /// diagnostics. - /// - /// If \p DiagHandler is not provided, the function calls - /// LLVMContext::diagnose() instead. - - LLVM_ABI static void CollectAsmSymvers( - const Module &M, function_ref<void(StringRef, StringRef)> AsmSymver, - function_ref<void(const DiagnosticInfo &DI)> DiagHandler = nullptr, - StringRef CPU = "", StringRef Features = ""); + LLVM_ABI static void + CollectAsmSymvers(const Module &M, + function_ref<void(StringRef, StringRef)> AsmSymver); - /// Emit module flags for symbols and symvers defined in global inline - /// assembly. This allows LLVM IR tools to build a symbol table for an - /// IR module without knowing exact CPU and Features required to parse + /// Emit module flags for symbols and symvers defined in global + /// inline assembly. If these flags are present, CollectAsmSymbols + /// and CollectAsmSymvers will use them instead of trying to parse + /// assembly again. + /// + /// This allows LLVM IR tools to build a symbol table for an IR + /// module without knowing exact CPU and Features required to parse /// its global inline assembly. - LLVM_ABI static bool EmitModuleFlags(Module &M, StringRef CPU = "", - StringRef Features = ""); + LLVM_ABI static bool EmitModuleFlags(Module &M, StringRef CPU, + StringRef Features); }; } // end namespace llvm diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp index 3dc606045358b..6c94dc61b195d 100644 --- a/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/llvm/lib/Object/ModuleSymbolTable.cpp @@ -50,8 +50,7 @@ using namespace llvm; using namespace object; -void ModuleSymbolTable::addModule(Module *M, StringRef CPU, - StringRef Features) { +void ModuleSymbolTable::addModule(Module *M) { if (FirstMod) assert(FirstMod->getTargetTriple() == M->getTargetTriple()); else @@ -60,13 +59,10 @@ void ModuleSymbolTable::addModule(Module *M, StringRef CPU, for (GlobalValue &GV : M->global_values()) SymTab.push_back(&GV); - CollectAsmSymbols( - *M, - [this](StringRef Name, BasicSymbolRef::Flags Flags) { - SymTab.push_back(new (AsmSymbols.Allocate()) - AsmSymbol(std::string(Name), Flags)); - }, - /*DiagHandler=*/nullptr, CPU, Features); + CollectAsmSymbols(*M, [this](StringRef Name, BasicSymbolRef::Flags Flags) { + SymTab.push_back(new (AsmSymbols.Allocate()) + AsmSymbol(std::string(Name), Flags)); + }); } static void initializeRecordStreamer( @@ -200,9 +196,7 @@ addSymbols(RecordStreamer &Streamer, void ModuleSymbolTable::CollectAsmSymbols( const Module &M, - function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol, - function_ref<void(const DiagnosticInfo &DI)> DiagHandler, StringRef CPU, - StringRef Features) { + function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { MDTuple *SymbolsMD = dyn_cast_if_present<MDTuple>(M.getModuleFlag("global-asm-symbols")); @@ -216,13 +210,14 @@ void ModuleSymbolTable::CollectAsmSymbols( AsmSymbol(Name->getString(), static_cast<BasicSymbolRef::Flags>(Flags->getZExtValue())); } + addSpecialSymbols(M, AsmSymbol); return; } initializeRecordStreamer( - M, CPU, Features, + M, /*CPU=*/"", /*Features=*/"", [&](RecordStreamer &Streamer) { addSymbols(Streamer, AsmSymbol); }, - DiagHandler); + /*DiagHandler=*/nullptr); addSpecialSymbols(M, AsmSymbol); } @@ -235,9 +230,7 @@ static void addSymvers(RecordStreamer &Streamer, } void ModuleSymbolTable::CollectAsmSymvers( - const Module &M, function_ref<void(StringRef, StringRef)> AsmSymver, - function_ref<void(const DiagnosticInfo &DI)> DiagHandler, StringRef CPU, - StringRef Features) { + const Module &M, function_ref<void(StringRef, StringRef)> AsmSymver) { MDTuple *SymversMD = dyn_cast_if_present<MDTuple>(M.getModuleFlag("global-asm-symvers")); @@ -254,9 +247,9 @@ void ModuleSymbolTable::CollectAsmSymvers( } initializeRecordStreamer( - M, CPU, Features, + M, /*CPU=*/"", /*Features=*/"", [&](RecordStreamer &Streamer) { addSymvers(Streamer, AsmSymver); }, - DiagHandler); + /*DiagHandler=*/nullptr); } bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, @@ -269,9 +262,9 @@ bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, // Ignore diagnostics from the assembly parser. // // Errors in assembly mean that we cannot build a symbol table - // from it. However, we do not diagnose them here in Clang, - // because we don't know if the Module is ever going to actually - // reach CodeGen where this would matter. + // from it. However, we do not diagnose them here, because we + // don't know if the Module is ever going to actually reach + // CodeGen where this would matter. if (DI.getSeverity() == llvm::DS_Error) HaveErrors = true; }; @@ -294,15 +287,15 @@ bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, auto AsmSymver = [&](StringRef Name, StringRef Alias) { auto ItNew = SymversMap.try_emplace(Name); SmallVector<llvm::Metadata *, 2> &Aliases = ItNew.first->second; - if (ItNew.second) { - // If it is a new list, insert the primary name at the - // front. + + // If it is a new list, insert the primary name at the front. + if (ItNew.second) Aliases.push_back(llvm::MDString::get(Ctx, Name)); - } + Aliases.push_back(llvm::MDString::get(Ctx, Alias)); }; - // Parse global inline assembly and collect all symbols and symvers + // Parse global inline assembly and collect all symbols and symvers. initializeRecordStreamer( M, CPU, Features, [&](RecordStreamer &Streamer) { @@ -311,11 +304,11 @@ bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, }, DiagHandler); - if (HaveErrors) { + if (HaveErrors) return false; - } - addSpecialSymbols(M, AsmSymbol); + // Emit a symbol table as module flags, so they can be traversed + // later with CollectAsmSymbols and CollectAsmSymvers. if (!Symbols.empty()) { M.addModuleFlag(llvm::Module::Append, "global-asm-symbols", >From 4e9d2649712ba8a28552bb430c332333af564a03 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Mon, 26 Jan 2026 14:46:42 +0900 Subject: [PATCH 05/12] Add a RISC-V test from #67698 --- clang/test/CodeGen/RISCV/global-inline-asm-flags.c | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 clang/test/CodeGen/RISCV/global-inline-asm-flags.c diff --git a/clang/test/CodeGen/RISCV/global-inline-asm-flags.c b/clang/test/CodeGen/RISCV/global-inline-asm-flags.c new file mode 100644 index 0000000000000..a0f699f5c52d6 --- /dev/null +++ b/clang/test/CodeGen/RISCV/global-inline-asm-flags.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple riscv64 -target-feature +zcmp -flto=full -emit-llvm -o - %s | FileCheck %s +// REQUIRES: riscv-registered-target + +asm(".globl func; func: cm.mvsa01 s1, s0; ret"); + +// CHECK: module asm ".globl func; func: cm.mvsa01 s1, s0; ret" + +// CHECK: !{{.*}} = !{i32 5, !"global-asm-symbols", ![[SYM:[0-9]+]]} +// CHECK: ![[SYM]] = !{![[FUNC:[0-9]+]]} +// CHECK: ![[FUNC]] = !{!"func", i32 2050} >From e9a734750843d2ff21ba32ea28e24b620dba8fbf Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Tue, 27 Jan 2026 11:16:16 +0900 Subject: [PATCH 06/12] Fix code style --- llvm/lib/Object/ModuleSymbolTable.cpp | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp index 6c94dc61b195d..48a62c6de17f0 100644 --- a/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/llvm/lib/Object/ModuleSymbolTable.cpp @@ -235,21 +235,20 @@ void ModuleSymbolTable::CollectAsmSymvers( MDTuple *SymversMD = dyn_cast_if_present<MDTuple>(M.getModuleFlag("global-asm-symvers")); - if (SymversMD) { - for (const Metadata *MD : SymversMD->operands()) { - const MDTuple *SymverMD = cast<MDTuple>(MD); - StringRef Name = cast<MDString>(SymverMD->getOperand(0))->getString(); - for (size_t i = 1, End = SymverMD->getNumOperands(); i < End; ++i) { - AsmSymver(Name, cast<MDString>(SymverMD->getOperand(i))->getString()); - } - } + if (!SymversMD) { + initializeRecordStreamer( + M, /*CPU=*/"", /*Features=*/"", + [&](RecordStreamer &Streamer) { addSymvers(Streamer, AsmSymver); }, + /*DiagHandler=*/nullptr); return; } - initializeRecordStreamer( - M, /*CPU=*/"", /*Features=*/"", - [&](RecordStreamer &Streamer) { addSymvers(Streamer, AsmSymver); }, - /*DiagHandler=*/nullptr); + for (const Metadata *MD : SymversMD->operands()) { + const MDTuple *SymverMD = cast<MDTuple>(MD); + StringRef Name = cast<MDString>(SymverMD->getOperand(0))->getString(); + for (size_t Idx = 1, End = SymverMD->getNumOperands(); Idx < End; ++Idx) + AsmSymver(Name, cast<MDString>(SymverMD->getOperand(Idx))->getString()); + } } bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, @@ -319,9 +318,8 @@ bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, if (!SymversMap.empty()) { SmallVector<llvm::Metadata *, 16> Symvers; Symvers.reserve(SymversMap.size()); - for (const auto &KV : SymversMap) { + for (const auto &KV : SymversMap) Symvers.push_back(llvm::MDNode::get(Ctx, KV.second)); - } M.addModuleFlag(llvm::Module::Append, "global-asm-symvers", llvm::MDNode::get(Ctx, Symvers)); >From 72e2f571c2f3ba38379666c78350925b45c9e3a7 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Tue, 27 Jan 2026 13:46:54 +0900 Subject: [PATCH 07/12] Materialize a module before trying to collect symbols, add a test for llvm-lto2 --- llvm/lib/Object/ModuleSymbolTable.cpp | 51 +++++++++++-------- .../AArch64/Inputs/global-inline-asm-flags.ll | 4 +- .../LTO/AArch64/global-inline-asm-flags.ll | 13 +++++ 3 files changed, 46 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp index 48a62c6de17f0..b9c08c081424c 100644 --- a/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/llvm/lib/Object/ModuleSymbolTable.cpp @@ -50,6 +50,24 @@ using namespace llvm; using namespace object; +static void addSpecialSymbols( + const Module &M, + function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { + // In ELF, object code generated for x86-32 and some code models of x86-64 may + // reference the special symbol _GLOBAL_OFFSET_TABLE_ that is not used in the + // IR. Record it like inline asm symbols. + Triple TT(M.getTargetTriple()); + if (!TT.isOSBinFormatELF() || !TT.isX86()) + return; + auto CM = M.getCodeModel(); + if (TT.getArch() == Triple::x86 || CM == CodeModel::Medium || + CM == CodeModel::Large) { + AsmSymbol("_GLOBAL_OFFSET_TABLE_", + BasicSymbolRef::Flags(BasicSymbolRef::SF_Undefined | + BasicSymbolRef::SF_Global)); + } +} + void ModuleSymbolTable::addModule(Module *M) { if (FirstMod) assert(FirstMod->getTargetTriple() == M->getTargetTriple()); @@ -59,10 +77,21 @@ void ModuleSymbolTable::addModule(Module *M) { for (GlobalValue &GV : M->global_values()) SymTab.push_back(&GV); - CollectAsmSymbols(*M, [this](StringRef Name, BasicSymbolRef::Flags Flags) { + auto AddSymbols = [this](StringRef Name, BasicSymbolRef::Flags Flags) { SymTab.push_back(new (AsmSymbols.Allocate()) AsmSymbol(std::string(Name), Flags)); - }); + }; + + if (M->getModuleInlineAsm().empty()) { + addSpecialSymbols(*M, AddSymbols); + return; + } + + // Make sure that global-asm-symbols is materialized. Otherwise + // CollectAsmSymbols falls back to parsing. + consumeError(M->materializeMetadata()); + + CollectAsmSymbols(*M, AddSymbols); } static void initializeRecordStreamer( @@ -143,24 +172,6 @@ static void initializeRecordStreamer( Init(Streamer); } -static void addSpecialSymbols( - const Module &M, - function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { - // In ELF, object code generated for x86-32 and some code models of x86-64 may - // reference the special symbol _GLOBAL_OFFSET_TABLE_ that is not used in the - // IR. Record it like inline asm symbols. - Triple TT(M.getTargetTriple()); - if (!TT.isOSBinFormatELF() || !TT.isX86()) - return; - auto CM = M.getCodeModel(); - if (TT.getArch() == Triple::x86 || CM == CodeModel::Medium || - CM == CodeModel::Large) { - AsmSymbol("_GLOBAL_OFFSET_TABLE_", - BasicSymbolRef::Flags(BasicSymbolRef::SF_Undefined | - BasicSymbolRef::SF_Global)); - } -} - static void addSymbols(RecordStreamer &Streamer, function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { diff --git a/llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll b/llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll index 349d8e37fc488..9d932b6d5bb7c 100644 --- a/llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll +++ b/llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll @@ -1,5 +1,5 @@ -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" -target triple = "aarch64-none-linux-gnu" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" module asm ".text" module asm ".balign 16" diff --git a/llvm/test/LTO/AArch64/global-inline-asm-flags.ll b/llvm/test/LTO/AArch64/global-inline-asm-flags.ll index 173cc014c8852..0526fae006f3e 100644 --- a/llvm/test/LTO/AArch64/global-inline-asm-flags.ll +++ b/llvm/test/LTO/AArch64/global-inline-asm-flags.ll @@ -2,6 +2,16 @@ ; RUN: llvm-as %p/Inputs/global-inline-asm-flags.ll -o %t2.bc ; RUN: llvm-lto -save-merged-module -filetype=asm -mattr=+pauth %t1.bc %t2.bc -o %t3 ; RUN: llvm-dis %t3.merged.bc -o - | FileCheck %s +; RUN: llvm-lto2 run -save-temps -mattr=+pauth -filetype=asm -o %t4.s %t1.bc %t2.bc \ +; RUN: -r=%t1.bc,baz,p \ +; RUN: -r=%t1.bc,baz@VER,p \ +; RUN: -r=%t1.bc,foo@LINKEDVER,p \ +; RUN: -r=%t2.bc,bar,p \ +; RUN: -r=%t2.bc,bar@VER,p \ +; RUN: -r=%t2.bc,foo@ANOTHERVER,p \ +; RUN: -r=%t2.bc,foo,p \ +; RUN: -r=%t2.bc,foo@VER,p +; RUN: llvm-dis %t4.s.0.5.precodegen.bc -o - | FileCheck %s ; Note that -mattr=+pauth for llvm-lto is still required, because it ; runs full CodeGen at the end. Symbols and Symvers are still @@ -46,6 +56,9 @@ ; CHECK: ![[VFOO2]] = !{!"foo", !"foo@VER", !"foo@ANOTHERVER"} ; CHECK: ![[VBAR]] = !{!"bar", !"bar@VER"} +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + module asm ".text" module asm ".balign 16" module asm ".globl baz" >From 0a6e4e3c1f76a8597a7ebc109d1728b56c437ea5 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Tue, 27 Jan 2026 14:32:42 +0900 Subject: [PATCH 08/12] Add checks for a list of symbol names --- .../LTO/AArch64/global-inline-asm-flags.ll | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/llvm/test/LTO/AArch64/global-inline-asm-flags.ll b/llvm/test/LTO/AArch64/global-inline-asm-flags.ll index 0526fae006f3e..860c6d4116510 100644 --- a/llvm/test/LTO/AArch64/global-inline-asm-flags.ll +++ b/llvm/test/LTO/AArch64/global-inline-asm-flags.ll @@ -1,7 +1,9 @@ ; RUN: llvm-as %s -o %t1.bc ; RUN: llvm-as %p/Inputs/global-inline-asm-flags.ll -o %t2.bc + ; RUN: llvm-lto -save-merged-module -filetype=asm -mattr=+pauth %t1.bc %t2.bc -o %t3 ; RUN: llvm-dis %t3.merged.bc -o - | FileCheck %s + ; RUN: llvm-lto2 run -save-temps -mattr=+pauth -filetype=asm -o %t4.s %t1.bc %t2.bc \ ; RUN: -r=%t1.bc,baz,p \ ; RUN: -r=%t1.bc,baz@VER,p \ @@ -13,10 +15,28 @@ ; RUN: -r=%t2.bc,foo@VER,p ; RUN: llvm-dis %t4.s.0.5.precodegen.bc -o - | FileCheck %s -; Note that -mattr=+pauth for llvm-lto is still required, because it -; runs full CodeGen at the end. Symbols and Symvers are still -; extracted from metadata. +; Note that -mattr=+pauth option for llvm-lto and llvm-lto2 is still +; required, because LTO runs full CodeGen at the end. Symbols and +; Symvers are still extracted from metadata. + +; RUN: llvm-nm %t1.bc | FileCheck %s --check-prefix NM1 +; RUN: llvm-nm %t2.bc | FileCheck %s --check-prefix NM2 +; RUN: llvm-nm %t3.merged.bc | FileCheck %s --check-prefixes NM1,NM2 +; RUN: llvm-nm %t4.s.0.5.precodegen.bc | FileCheck %s --check-prefixes NM1,NM2 + +; Symbols of the first module +; NM1-DAG: U baz +; NM1-DAG: U baz@VER +; NM1-DAG: U foo@LINKEDVER + +; Symbols of the second module +; NM2-DAG: U bar +; NM2-DAG: U bar@VER +; NM2-DAG: U foo +; NM2-DAG: U foo@ANOTHERVER +; NM2-DAG: U foo@VER +; IR with two modules linked ; CHECK: module asm ".text" ; CHECK: module asm ".balign 16" ; CHECK: module asm ".globl baz" >From 0e4bbe047d68d3eea10b92ce905400cac0538994 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Tue, 27 Jan 2026 17:53:22 +0900 Subject: [PATCH 09/12] Always emit symbols and symvers metadata if inline assembly is present --- .../CodeGen/RISCV/global-inline-asm-flags.c | 2 ++ llvm/lib/Object/ModuleSymbolTable.cpp | 26 +++++++------------ .../test/LTO/RISCV/global-inline-asm-flags.ll | 26 +++++++++++++++++++ 3 files changed, 37 insertions(+), 17 deletions(-) create mode 100644 llvm/test/LTO/RISCV/global-inline-asm-flags.ll diff --git a/clang/test/CodeGen/RISCV/global-inline-asm-flags.c b/clang/test/CodeGen/RISCV/global-inline-asm-flags.c index a0f699f5c52d6..65fefac4d453a 100644 --- a/clang/test/CodeGen/RISCV/global-inline-asm-flags.c +++ b/clang/test/CodeGen/RISCV/global-inline-asm-flags.c @@ -8,3 +8,5 @@ asm(".globl func; func: cm.mvsa01 s1, s0; ret"); // CHECK: !{{.*}} = !{i32 5, !"global-asm-symbols", ![[SYM:[0-9]+]]} // CHECK: ![[SYM]] = !{![[FUNC:[0-9]+]]} // CHECK: ![[FUNC]] = !{!"func", i32 2050} +// CHECK: !{{.*}} = !{i32 5, !"global-asm-symvers", ![[SYMVERS:[0-9]+]]} +// CHECK: ![[SYMVERS]] = !{} diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp index b9c08c081424c..3c1fcb514a726 100644 --- a/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/llvm/lib/Object/ModuleSymbolTable.cpp @@ -264,7 +264,6 @@ void ModuleSymbolTable::CollectAsmSymvers( bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, StringRef Features) { - bool Changed = false; llvm::LLVMContext &Ctx = M.getContext(); bool HaveErrors = false; @@ -319,25 +318,18 @@ bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, // Emit a symbol table as module flags, so they can be traversed // later with CollectAsmSymbols and CollectAsmSymvers. + M.addModuleFlag(llvm::Module::Append, "global-asm-symbols", + llvm::MDNode::get(Ctx, Symbols)); - if (!Symbols.empty()) { - M.addModuleFlag(llvm::Module::Append, "global-asm-symbols", - llvm::MDNode::get(Ctx, Symbols)); - Changed = true; - } - - if (!SymversMap.empty()) { - SmallVector<llvm::Metadata *, 16> Symvers; - Symvers.reserve(SymversMap.size()); - for (const auto &KV : SymversMap) - Symvers.push_back(llvm::MDNode::get(Ctx, KV.second)); + SmallVector<llvm::Metadata *, 16> Symvers; + Symvers.reserve(SymversMap.size()); + for (const auto &KV : SymversMap) + Symvers.push_back(llvm::MDNode::get(Ctx, KV.second)); - M.addModuleFlag(llvm::Module::Append, "global-asm-symvers", - llvm::MDNode::get(Ctx, Symvers)); - Changed = true; - } + M.addModuleFlag(llvm::Module::Append, "global-asm-symvers", + llvm::MDNode::get(Ctx, Symvers)); - return Changed; + return true; } void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const { diff --git a/llvm/test/LTO/RISCV/global-inline-asm-flags.ll b/llvm/test/LTO/RISCV/global-inline-asm-flags.ll new file mode 100644 index 0000000000000..94c1801ac9ab2 --- /dev/null +++ b/llvm/test/LTO/RISCV/global-inline-asm-flags.ll @@ -0,0 +1,26 @@ +; RUN: llvm-as %s -o %t.o +; RUN: llvm-lto2 run -mattr=+zcmp -filetype=asm -o %t.s %t.o -r=%t.o,func +; RUN: llvm-nm %t.o | FileCheck %s --check-prefix NM + +; NM: T func + +; CHECK: cm.mvsa01 s1, s0 +; CHECK-NEXT: ret + + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64" + +module asm ".globl func; func: cm.mvsa01 s1, s0; ret" + +!llvm.module.flags = !{!0, !1, !2, !4, !7} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"target-abi", !"lp64"} +!2 = !{i32 6, !"riscv-isa", !3} +!3 = !{!"rv64i2p1_c2p0_zca1p0_zcmp1p0"} +!4 = !{i32 5, !"global-asm-symbols", !5} +!5 = !{!6} +!6 = !{!"func", i32 2050} +!7 = !{i32 5, !"global-asm-symvers", !8} +!8 = !{} >From ae858b8f41150eafe15395357e0bfb9088a77d8e Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Tue, 27 Jan 2026 18:41:53 +0900 Subject: [PATCH 10/12] Add labels to define symbols in inline assembly --- .../CodeGen/AArch64/global-inline-asm-flags.c | 14 ++++--- .../AArch64/Inputs/global-inline-asm-flags.ll | 12 +++--- .../LTO/AArch64/global-inline-asm-flags.ll | 42 ++++++++++--------- 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/clang/test/CodeGen/AArch64/global-inline-asm-flags.c b/clang/test/CodeGen/AArch64/global-inline-asm-flags.c index 7d4e82d95e09a..432e306b3eac7 100644 --- a/clang/test/CodeGen/AArch64/global-inline-asm-flags.c +++ b/clang/test/CodeGen/AArch64/global-inline-asm-flags.c @@ -5,11 +5,13 @@ asm ( ".text" "\n" ".balign 16" "\n" ".globl foo\n" + "foo:\n" "pacib x30, x27" "\n" "retab" "\n" ".symver foo, foo@VER" "\n" ".symver foo, foo@ANOTHERVER" "\n" ".globl bar\n" + "bar:\n" "pacib x30, x27" "\n" "retab" "\n" ".symver bar, bar@VER" "\n" @@ -19,11 +21,13 @@ asm ( // CHECK: module asm ".text" // CHECK: module asm ".balign 16" // CHECK: module asm ".globl foo" +// CHECK: module asm "foo:" // CHECK: module asm "pacib x30, x27" // CHECK: module asm "retab" // CHECK: module asm ".symver foo, foo@VER" // CHECK: module asm ".symver foo, foo@ANOTHERVER" // CHECK: module asm ".globl bar" +// CHECK: module asm "bar:" // CHECK: module asm "pacib x30, x27" // CHECK: module asm "retab" // CHECK: module asm ".symver bar, bar@VER" @@ -31,11 +35,11 @@ asm ( // CHECK: !{{.*}} = !{i32 5, !"global-asm-symbols", ![[SYM:[0-9]+]]} // CHECK: ![[SYM]] = !{![[SBAR1:[0-9]+]], ![[SBAR2:[0-9]+]], ![[SBAR3:[0-9]+]], ![[SFOO1:[0-9]+]], ![[SFOO2:[0-9]+]]} -// CHECK: ![[SBAR1]] = !{!"bar", i32 2051} -// CHECK: ![[SBAR2]] = !{!"bar@VER", i32 2051} -// CHECK: ![[SBAR3]] = !{!"foo@ANOTHERVER", i32 2051} -// CHECK: ![[SFOO1]] = !{!"foo", i32 2051} -// CHECK: ![[SFOO2]] = !{!"foo@VER", i32 2051} +// CHECK: ![[SBAR1]] = !{!"bar", i32 2050} +// CHECK: ![[SBAR2]] = !{!"bar@VER", i32 2050} +// CHECK: ![[SBAR3]] = !{!"foo@ANOTHERVER", i32 2050} +// CHECK: ![[SFOO1]] = !{!"foo", i32 2050} +// CHECK: ![[SFOO2]] = !{!"foo@VER", i32 2050} // CHECK: !{{.*}} = !{i32 5, !"global-asm-symvers", ![[SYMVER:[0-9]+]]} // CHECK: ![[SYMVER]] = !{![[VFOO:[0-9]+]], ![[VBAR:[0-9]+]]} // CHECK: ![[VFOO:[0-9]+]] = !{!"foo", !"foo@VER", !"foo@ANOTHERVER"} diff --git a/llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll b/llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll index 9d932b6d5bb7c..137a7b8ec24ea 100644 --- a/llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll +++ b/llvm/test/LTO/AArch64/Inputs/global-inline-asm-flags.ll @@ -4,11 +4,13 @@ target triple = "aarch64-unknown-linux-gnu" module asm ".text" module asm ".balign 16" module asm ".globl foo" +module asm "foo:" module asm "pacib x30, x27" module asm "retab" module asm ".symver foo, foo@VER" module asm ".symver foo, foo@ANOTHERVER" module asm ".globl bar" +module asm "bar:" module asm "pacib x30, x27" module asm "retab" module asm ".symver bar, bar@VER" @@ -18,11 +20,11 @@ module asm ".previous" !1 = !{i32 5, !"global-asm-symbols", !2} !2 = !{!3, !4, !5, !6, !7} -!3 = !{!"bar", i32 2051} -!4 = !{!"bar@VER", i32 2051} -!5 = !{!"foo@ANOTHERVER", i32 2051} -!6 = !{!"foo", i32 2051} -!7 = !{!"foo@VER", i32 2051} +!3 = !{!"bar", i32 2050} +!4 = !{!"bar@VER", i32 2050} +!5 = !{!"foo@ANOTHERVER", i32 2050} +!6 = !{!"foo", i32 2050} +!7 = !{!"foo@VER", i32 2050} !8 = !{i32 5, !"global-asm-symvers", !9} !9 = !{!10, !11} !10 = !{!"foo", !"foo@VER", !"foo@ANOTHERVER"} diff --git a/llvm/test/LTO/AArch64/global-inline-asm-flags.ll b/llvm/test/LTO/AArch64/global-inline-asm-flags.ll index 860c6d4116510..269f2a6020314 100644 --- a/llvm/test/LTO/AArch64/global-inline-asm-flags.ll +++ b/llvm/test/LTO/AArch64/global-inline-asm-flags.ll @@ -25,21 +25,22 @@ ; RUN: llvm-nm %t4.s.0.5.precodegen.bc | FileCheck %s --check-prefixes NM1,NM2 ; Symbols of the first module -; NM1-DAG: U baz -; NM1-DAG: U baz@VER -; NM1-DAG: U foo@LINKEDVER +; NM1-DAG: T baz +; NM1-DAG: T baz@VER +; NM1-DAG: T foo@LINKEDVER ; Symbols of the second module -; NM2-DAG: U bar -; NM2-DAG: U bar@VER -; NM2-DAG: U foo -; NM2-DAG: U foo@ANOTHERVER -; NM2-DAG: U foo@VER +; NM2-DAG: T bar +; NM2-DAG: T bar@VER +; NM2-DAG: T foo +; NM2-DAG: T foo@ANOTHERVER +; NM2-DAG: T foo@VER ; IR with two modules linked ; CHECK: module asm ".text" ; CHECK: module asm ".balign 16" ; CHECK: module asm ".globl baz" +; CHECK: module asm "baz:" ; CHECK: module asm "pacib x30, x27" ; CHECK: module asm "retab" ; CHECK: module asm ".symver baz, baz@VER" @@ -48,11 +49,13 @@ ; CHECK: module asm ".text" ; CHECK: module asm ".balign 16" ; CHECK: module asm ".globl foo" +; CHECK: module asm "foo:" ; CHECK: module asm "pacib x30, x27" ; CHECK: module asm "retab" ; CHECK: module asm ".symver foo, foo@VER" ; CHECK: module asm ".symver foo, foo@ANOTHERVER" ; CHECK: module asm ".globl bar" +; CHECK: module asm "bar:" ; CHECK: module asm "pacib x30, x27" ; CHECK: module asm "retab" ; CHECK: module asm ".symver bar, bar@VER" @@ -60,14 +63,14 @@ ; CHECK: !{{[0-9]+}} = distinct !{i32 5, !"global-asm-symbols", ![[SYM:[0-9]+]]} ; CHECK: ![[SYM]] = distinct !{![[SBAZ1:[0-9]+]], ![[SBAZ2:[0-9]+]], ![[SFOO1:[0-9]+]], ![[SBAR1:[0-9]+]], ![[SBAR2:[0-9]+]], ![[SFOO2:[0-9]+]], ![[SFOO3:[0-9]+]], ![[SFOO4:[0-9]+]]} -; CHECK: ![[SBAZ1]] = !{!"baz", i32 2051} -; CHECK: ![[SBAZ2]] = !{!"baz@VER", i32 2051} -; CHECK: ![[SFOO1]] = !{!"foo@LINKEDVER", i32 2051} -; CHECK: ![[SBAR1]] = !{!"bar", i32 2051} -; CHECK: ![[SBAR2]] = !{!"bar@VER", i32 2051} -; CHECK: ![[SFOO2]] = !{!"foo@ANOTHERVER", i32 2051} -; CHECK: ![[SFOO3]] = !{!"foo", i32 2051} -; CHECK: ![[SFOO4]] = !{!"foo@VER", i32 2051} +; CHECK: ![[SBAZ1]] = !{!"baz", i32 2050} +; CHECK: ![[SBAZ2]] = !{!"baz@VER", i32 2050} +; CHECK: ![[SFOO1]] = !{!"foo@LINKEDVER", i32 2050} +; CHECK: ![[SBAR1]] = !{!"bar", i32 2050} +; CHECK: ![[SBAR2]] = !{!"bar@VER", i32 2050} +; CHECK: ![[SFOO2]] = !{!"foo@ANOTHERVER", i32 2050} +; CHECK: ![[SFOO3]] = !{!"foo", i32 2050} +; CHECK: ![[SFOO4]] = !{!"foo@VER", i32 2050} ; CHECK: !{{[0-9]+}} = distinct !{i32 5, !"global-asm-symvers", ![[SYMVER:[0-9]+]]} ; CHECK: ![[SYMVER]] = distinct !{![[VBAZ:[0-9]+]], ![[VFOO1:[0-9]+]], ![[VFOO2:[0-9]+]], ![[VBAR:[0-9]+]]} @@ -82,6 +85,7 @@ target triple = "aarch64-unknown-linux-gnu" module asm ".text" module asm ".balign 16" module asm ".globl baz" +module asm "baz:" module asm "pacib x30, x27" module asm "retab" module asm ".symver baz, baz@VER" @@ -92,9 +96,9 @@ module asm ".previous" !0 = !{i32 5, !"global-asm-symbols", !1} !1 = !{!2, !3, !4} -!2 = !{!"baz", i32 2051} -!3 = !{!"baz@VER", i32 2051} -!4 = !{!"foo@LINKEDVER", i32 2051} +!2 = !{!"baz", i32 2050} +!3 = !{!"baz@VER", i32 2050} +!4 = !{!"foo@LINKEDVER", i32 2050} !5 = !{i32 5, !"global-asm-symvers", !6} !6 = !{!7, !8} !7 = !{!"baz", !"baz@VER"} >From 6d3d18080ad1157b4e9773770d60ee2a05912408 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Wed, 28 Jan 2026 18:03:55 +0900 Subject: [PATCH 11/12] Add a release note --- llvm/docs/ReleaseNotes.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 74316ccdcaabf..d5f5cf06e1a9f 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -63,6 +63,11 @@ Changes to the LLVM IR intrinsics. These are equivalent to `fptrunc` and `fpext` with half with a bitcast. +* Added `global-asm-symbols` and `global-asm-symvers` module flags to + keep a list of symbols and symvers defined in global inline + assembly. This allows LLVM tools to build a symbol table for a + module without running AsmParser to find symbols in assembly. + Changes to LLVM infrastructure ------------------------------ >From bb8a22f092df2948a2b8ff7385016200a2ca0efd Mon Sep 17 00:00:00 2001 From: Andrew Savonichev <[email protected]> Date: Wed, 28 Jan 2026 18:19:18 +0900 Subject: [PATCH 12/12] Skip EmitModuleFlags if there is no global inline assembly --- clang/lib/CodeGen/CodeGenModule.cpp | 8 +++----- llvm/lib/Object/ModuleSymbolTable.cpp | 3 +++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 86eaec3da12d3..d67560eea268d 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1570,11 +1570,9 @@ void CodeGenModule::Release() { getContext().getTargetInfo().getMaxTLSAlign()); // Emit module flags for global inline assembly symbols. - if (!TheModule.getModuleInlineAsm().empty()) { - llvm::ModuleSymbolTable::EmitModuleFlags( - TheModule, getTarget().getTargetOpts().CPU, - llvm::join(getTarget().getTargetOpts().Features, ",")); - } + llvm::ModuleSymbolTable::EmitModuleFlags( + TheModule, getTarget().getTargetOpts().CPU, + llvm::join(getTarget().getTargetOpts().Features, ",")); getTargetCodeGenInfo().emitTargetGlobals(*this); diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp index 3c1fcb514a726..f05e029725171 100644 --- a/llvm/lib/Object/ModuleSymbolTable.cpp +++ b/llvm/lib/Object/ModuleSymbolTable.cpp @@ -264,6 +264,9 @@ void ModuleSymbolTable::CollectAsmSymvers( bool ModuleSymbolTable::EmitModuleFlags(Module &M, StringRef CPU, StringRef Features) { + if (M.getModuleInlineAsm().empty()) + return false; + llvm::LLVMContext &Ctx = M.getContext(); bool HaveErrors = false; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
