llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-lld-elf Author: None (llvmbot) <details> <summary>Changes</summary> Backport bc45ea2c4f24c259814ed5545c403d09ebf89be6 Requested by: @<!-- -->MaskRay --- Full diff: https://github.com/llvm/llvm-project/pull/179867.diff 5 Files Affected: - (modified) lld/ELF/Arch/RISCV.cpp (+18-13) - (modified) lld/ELF/Config.h (+3) - (modified) lld/ELF/Relocations.cpp (+21-41) - (added) lld/test/ELF/loongarch-ifunc-nonpreemptible.s (+70) - (modified) lld/test/ELF/riscv-ifunc-nonpreemptible.s (+92-38) ``````````diff diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 7ec75b0d61fce..bdb728bc19e7b 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -715,28 +715,33 @@ void elf::initSymbolAnchors(Ctx &ctx) { } } } - // Store anchors (st_value and st_value+st_size) for symbols relative to text - // sections. + // Store symbol anchors for adjusting st_value/st_size during relaxation. + // We include symbols where d->file == file for the prevailing copies. // // For a defined symbol foo, we may have `d->file != file` with --wrap=foo. // We should process foo, as the defining object file's symbol table may not - // contain foo after redirectSymbols changed the foo entry to __wrap_foo. To - // avoid adding a Defined that is undefined in one object file, use - // `!d->scriptDefined` to exclude symbols that are definitely not wrapped. + // contain foo after redirectSymbols changed the foo entry to __wrap_foo. Use + // `d->scriptDefined` to include such symbols. // // `relaxAux->anchors` may contain duplicate symbols, but that is fine. + auto addAnchor = [](Defined *d) { + if (auto *sec = dyn_cast_or_null<InputSection>(d->section)) + if (sec->flags & SHF_EXECINSTR && sec->relaxAux) { + // If sec is discarded, relaxAux will be nullptr. + sec->relaxAux->anchors.push_back({d->value, d, false}); + sec->relaxAux->anchors.push_back({d->value + d->size, d, true}); + } + }; for (InputFile *file : ctx.objectFiles) for (Symbol *sym : file->getSymbols()) { auto *d = dyn_cast<Defined>(sym); - if (!d || (d->file != file && !d->scriptDefined)) - continue; - if (auto *sec = dyn_cast_or_null<InputSection>(d->section)) - if (sec->flags & SHF_EXECINSTR && sec->relaxAux) { - // If sec is discarded, relaxAux will be nullptr. - sec->relaxAux->anchors.push_back({d->value, d, false}); - sec->relaxAux->anchors.push_back({d->value + d->size, d, true}); - } + if (d && (d->file == file || d->scriptDefined)) + addAnchor(d); } + // Add anchors for IRELATIVE symbols (see `handleNonPreemptibleIfunc`). + // Their values must be adjusted so IRELATIVE addends remain correct. + for (Defined *d : ctx.irelativeSyms) + addAnchor(d); // Sort anchors by offset so that we can find the closest relocation // efficiently. For a zero size symbol, ensure that its start anchor precedes // its end anchor. For two symbols with anchors at the same offset, their diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 8ec5a2c04e71c..7ae6f871fe3e0 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -668,6 +668,9 @@ struct Ctx : CommonLinkerContext { ElfSym sym{}; std::unique_ptr<SymbolTable> symtab; SmallVector<Symbol *, 0> synthesizedSymbols; + // ifunc resolver symbol clones for IRELATIVE. Linker relaxation adjusts + // these. + SmallVector<Defined *, 0> irelativeSyms; SmallVector<std::unique_ptr<MemoryBuffer>> memoryBuffers; SmallVector<ELFFileBase *, 0> objectFiles; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 226c4e7907a5e..998471f0784ec 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1459,42 +1459,25 @@ RelocationBaseSection &elf::getIRelativeSection(Ctx &ctx) { } static bool handleNonPreemptibleIfunc(Ctx &ctx, Symbol &sym, uint16_t flags) { - // Handle a reference to a non-preemptible ifunc. These are special in a - // few ways: + // Non-preemptible ifuncs are called via a PLT entry that resolves the actual + // address at runtime. We create an IPLT entry and an IGOTPLT slot. The + // IGOTPLT slot is relocated by an IRELATIVE relocation, whose addend encodes + // the resolver address. At startup, the runtime calls the resolver and + // fills the IGOTPLT slot. // - // - Unlike most non-preemptible symbols, non-preemptible ifuncs do not have - // a fixed value. But assuming that all references to the ifunc are - // GOT-generating or PLT-generating, the handling of an ifunc is - // relatively straightforward. We create a PLT entry in Iplt, which is - // usually at the end of .plt, which makes an indirect call using a - // matching GOT entry in igotPlt, which is usually at the end of .got.plt. - // The GOT entry is relocated using an IRELATIVE relocation in relaDyn, - // which is usually at the end of .rela.dyn. + // For direct (non-GOT/PLT) relocations, the symbol must have a constant + // address. We achieve this by redirecting the symbol to its IPLT entry + // ("canonicalizing" it), so all references see the same address, and the + // resolver is called exactly once. This may result in two GOT entries: one + // in .got.plt for the IRELATIVE, and one in .got pointing to the canonical + // IPLT entry (for GOT-generating relocations). // - // - Despite the fact that an ifunc does not have a fixed value, compilers - // that are not passed -fPIC will assume that they do, and will emit - // direct (non-GOT-generating, non-PLT-generating) relocations to the - // symbol. This means that if a direct relocation to the symbol is - // seen, the linker must set a value for the symbol, and this value must - // be consistent no matter what type of reference is made to the symbol. - // This can be done by creating a PLT entry for the symbol in the way - // described above and making it canonical, that is, making all references - // point to the PLT entry instead of the resolver. In lld we also store - // the address of the PLT entry in the dynamic symbol table, which means - // that the symbol will also have the same value in other modules. - // Because the value loaded from the GOT needs to be consistent with - // the value computed using a direct relocation, a non-preemptible ifunc - // may end up with two GOT entries, one in .got.plt that points to the - // address returned by the resolver and is used only by the PLT entry, - // and another in .got that points to the PLT entry and is used by - // GOT-generating relocations. + // We clone the symbol to preserve the original resolver address for the + // IRELATIVE addend. The clone is tracked in ctx.irelativeSyms so that linker + // relaxation can adjust its value when the resolver address changes. // - // - The fact that these symbols do not have a fixed value makes them an - // exception to the general rule that a statically linked executable does - // not require any form of dynamic relocation. To handle these relocations - // correctly, the IRELATIVE relocations are stored in an array which a - // statically linked executable's startup code must enumerate using the - // linker-defined symbols __rela?_iplt_{start,end}. + // Note: IRELATIVE relocations are needed even in static executables; see + // `addRelIpltSymbols`. if (!sym.isGnuIFunc() || sym.isPreemptible || ctx.arg.zIfuncNoplt) return false; // Skip unreferenced non-preemptible ifunc. @@ -1503,17 +1486,14 @@ static bool handleNonPreemptibleIfunc(Ctx &ctx, Symbol &sym, uint16_t flags) { sym.isInIplt = true; - // Create an Iplt and the associated IRELATIVE relocation pointing to the - // original section/value pairs. For non-GOT non-PLT relocation case below, we - // may alter section/value, so create a copy of the symbol to make - // section/value fixed. - auto *directSym = makeDefined(cast<Defined>(sym)); - directSym->allocateAux(ctx); + auto *irelativeSym = makeDefined(cast<Defined>(sym)); + irelativeSym->allocateAux(ctx); + ctx.irelativeSyms.push_back(irelativeSym); auto &dyn = getIRelativeSection(ctx); addPltEntry(ctx, *ctx.in.iplt, *ctx.in.igotPlt, dyn, ctx.target->iRelativeRel, - *directSym); + *irelativeSym); sym.allocateAux(ctx); - ctx.symAux.back().pltIdx = ctx.symAux[directSym->auxIdx].pltIdx; + ctx.symAux.back().pltIdx = ctx.symAux[irelativeSym->auxIdx].pltIdx; if (flags & HAS_DIRECT_RELOC) { // Change the value to the IPLT and redirect all references to it. diff --git a/lld/test/ELF/loongarch-ifunc-nonpreemptible.s b/lld/test/ELF/loongarch-ifunc-nonpreemptible.s new file mode 100644 index 0000000000000..109fc9f4f6d35 --- /dev/null +++ b/lld/test/ELF/loongarch-ifunc-nonpreemptible.s @@ -0,0 +1,70 @@ +# REQUIRES: loongarch +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax %s -o %t.o +# RUN: ld.lld -pie %t.o -o %t +# RUN: llvm-readobj -r %t | FileCheck --check-prefix=RELOC %s +# RUN: llvm-readelf -s %t | FileCheck --check-prefix=SYM %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=DIS %s + +## ifunc0 has a direct relocation, so it gets canonicalized to the IPLT entry. +## ifunc1 has only a GOT relocation, so its symbol remains in the original section. +## ifunc2 has both direct and GOT relocations, so it gets canonicalized to the IPLT entry. +## All IRELATIVE addends must be correctly adjusted after relaxation. + +# RELOC: .rela.dyn { +# RELOC-NEXT: 0x203E0 R_LARCH_RELATIVE - 0x10300 +# RELOC-NEXT: 0x303E8 R_LARCH_IRELATIVE - 0x102D0 +# RELOC-NEXT: 0x303F0 R_LARCH_IRELATIVE - 0x102D4 +# RELOC-NEXT: 0x303F8 R_LARCH_IRELATIVE - 0x102D8 +# RELOC-NEXT: } + +# SYM: {{0*}}102e0 0 FUNC GLOBAL DEFAULT {{.*}} ifunc0 +# SYM-NEXT: {{0*}}102d4 0 IFUNC GLOBAL DEFAULT {{.*}} ifunc1 +# SYM-NEXT: {{0*}}10300 0 FUNC GLOBAL DEFAULT {{.*}} ifunc2 + +# DIS: <_start>: +# DIS-NEXT: 102a8: bl 36 <func> +# DIS-NEXT: pcalau12i $a0, 0 +# DIS-NEXT: addi.d $a0, $a0, 736 +# DIS-NEXT: pcalau12i $a1, 32 +# DIS-NEXT: ld.d $a1, $a1, 1008 +# DIS-NEXT: pcalau12i $a2, 0 +# DIS-NEXT: addi.d $a2, $a2, 768 +# DIS-NEXT: pcalau12i $a3, 0 +# DIS-NEXT: addi.d $a3, $a3, 768 +# DIS: Disassembly of section .iplt: +# DIS: <ifunc0>: +# DIS-NEXT: 102e0: pcaddu12i $t3, 32 + +.text +.globl _start +_start: + call36 func +.L0: + pcalau12i $a0, %pc_hi20(ifunc0) + addi.d $a0, $a0, %pc_lo12(ifunc0) +.L1: + pcalau12i $a1, %got_pc_hi20(ifunc1) + ld.d $a1, $a1, %got_pc_lo12(ifunc1) +.L2: + pcalau12i $a2, %pc_hi20(ifunc2) + addi.d $a2, $a2, %pc_lo12(ifunc2) +.L3: + pcalau12i $a3, %got_pc_hi20(ifunc2) + ld.d $a3, $a3, %got_pc_lo12(ifunc2) + +.globl func +func: + ret + +## Resolvers are after relaxed code, so their addresses shift due to relaxation. +## The IRELATIVE addends must be adjusted accordingly. +.globl ifunc0, ifunc1, ifunc2 +.type ifunc0, @gnu_indirect_function +.type ifunc1, @gnu_indirect_function +.type ifunc2, @gnu_indirect_function +ifunc0: + ret +ifunc1: + ret +ifunc2: + ret diff --git a/lld/test/ELF/riscv-ifunc-nonpreemptible.s b/lld/test/ELF/riscv-ifunc-nonpreemptible.s index eda5548eef8b9..1e564028a8044 100644 --- a/lld/test/ELF/riscv-ifunc-nonpreemptible.s +++ b/lld/test/ELF/riscv-ifunc-nonpreemptible.s @@ -1,70 +1,124 @@ # REQUIRES: riscv -# RUN: llvm-mc -filetype=obj -triple=riscv32 %s -o %t.32.o -# RUN: ld.lld -pie %t.32.o -o %t.32 -# RUN: ld.lld -pie %t.32.o -o %t.32-apply --apply-dynamic-relocs +# RUN: llvm-mc -filetype=obj -triple=riscv32 %s -mattr=+relax -o %t.32.o +# DEFINE: %{layout} = --section-start .rela.dyn=0x1000 -Ttext=0x2000 --section-start=.iplt=0x3000 +# RUN: ld.lld -pie %{layout} %t.32.o -o %t.32 +# RUN: ld.lld -pie %{layout} %t.32.o -o %t.32-apply --apply-dynamic-relocs # RUN: llvm-readobj -r -x .got.plt %t.32 | FileCheck --check-prefixes=RELOC32,NO-APPLY-RELOC32 %s # RUN: llvm-readobj -r -x .got.plt %t.32-apply | FileCheck --check-prefixes=RELOC32,APPLY-RELOC32 %s # RUN: llvm-readelf -s %t.32 | FileCheck --check-prefix=SYM32 %s # RUN: llvm-objdump -d --no-show-raw-insn %t.32 | FileCheck --check-prefix=DIS32 %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 %s -o %t.64.o -# RUN: ld.lld -pie %t.64.o -o %t.64 -# RUN: ld.lld -pie %t.64.o -o %t.64-apply --apply-dynamic-relocs +# RUN: llvm-mc -filetype=obj -triple=riscv64 %s -mattr=+relax -o %t.64.o +# RUN: ld.lld -pie %{layout} %t.64.o -o %t.64 +# RUN: ld.lld -pie %{layout} %t.64.o -o %t.64-apply --apply-dynamic-relocs # RUN: llvm-readobj -r -x .got.plt %t.64 | FileCheck --check-prefixes=RELOC64,NO-APPLY-RELOC64 %s # RUN: llvm-readobj -r -x .got.plt %t.64-apply | FileCheck --check-prefixes=RELOC64,APPLY-RELOC64 %s # RUN: llvm-readelf -s %t.64 | FileCheck --check-prefix=SYM64 %s # RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefix=DIS64 %s +## ifunc0 has a direct relocation, so it gets canonicalized to the IPLT entry. +## ifunc1 has only a GOT relocation, so its symbol remains in the original section. +## ifunc2 has both direct and GOT relocations, so it gets canonicalized to the IPLT entry. +## All IRELATIVE addends must be correctly adjusted after relaxation. + # RELOC32: .rela.dyn { -# RELOC32-NEXT: 0x3200 R_RISCV_IRELATIVE - 0x117C +# RELOC32-NEXT: 0x50D8 R_RISCV_RELATIVE - 0x3020 +# RELOC32-NEXT: 0x60DC R_RISCV_IRELATIVE - 0x2028 +# RELOC32-NEXT: 0x60E0 R_RISCV_IRELATIVE - 0x202C +# RELOC32-NEXT: 0x60E4 R_RISCV_IRELATIVE - 0x2030 # RELOC32-NEXT: } # RELOC32-LABEL: Hex dump of section '.got.plt': -# NO-APPLY-RELOC32: 0x00003200 00000000 -# APPLY-RELOC32: 0x00003200 7c110000 -# RELOC32-EMPTY: +# NO-APPLY-RELOC32: 0x000060dc 00000000 00000000 00000000 +# APPLY-RELOC32: 0x000060dc 28200000 2c200000 30200000 -# SYM32: 0001190 0 FUNC GLOBAL DEFAULT {{.*}} func +# SYM32: {{0*}}3000 0 FUNC GLOBAL DEFAULT {{.*}} ifunc0 +# SYM32-NEXT: {{0*}}202c 0 IFUNC GLOBAL DEFAULT {{.*}} ifunc1 +# SYM32-NEXT: {{0*}}3020 0 FUNC GLOBAL DEFAULT {{.*}} ifunc2 # DIS32: <_start>: -# DIS32-NEXT: 1180: auipc a0, 0x0 -# DIS32-NEXT: addi a0, a0, 0x10 +# DIS32-NEXT: 2000: jal 0x2024 <func> +# DIS32: <.L0>: +# DIS32-NEXT: 2004: auipc a0, 0x1 +# DIS32-NEXT: addi a0, a0, -0x4 +# DIS32: <.L1>: +# DIS32-NEXT: 200c: auipc a1, 0x4 +# DIS32-NEXT: addi a1, a1, 0xd4 +# DIS32: <.L2>: +# DIS32-NEXT: 2014: auipc a2, 0x1 +# DIS32-NEXT: addi a2, a2, 0xc +# DIS32: <.L3>: +# DIS32-NEXT: 201c: auipc a3, 0x3 +# DIS32-NEXT: addi a3, a3, 0xbc # DIS32: Disassembly of section .iplt: -# DIS32: <func>: -## 32-bit: &.got.plt[func]-. = 0x3200-0x1190 = 4096*2+0x70 -# DIS32-NEXT: 1190: auipc t3, 0x2 -# DIS32-NEXT: lw t3, 0x70(t3) -# DIS32-NEXT: jalr t1, t3 -# DIS32-NEXT: nop +# DIS32: <ifunc0>: +## 32-bit: &.got.plt[ifunc0]-. = 0x60dc-0x3000 = 4096*3+0xdc +# DIS32-NEXT: 3000: auipc t3, 0x3 +# DIS32-NEXT: lw t3, 0xdc(t3) # RELOC64: .rela.dyn { -# RELOC64-NEXT: 0x3340 R_RISCV_IRELATIVE - 0x1260 +# RELOC64-NEXT: 0x5150 R_RISCV_RELATIVE - 0x3020 +# RELOC64-NEXT: 0x6158 R_RISCV_IRELATIVE - 0x2028 +# RELOC64-NEXT: 0x6160 R_RISCV_IRELATIVE - 0x202C +# RELOC64-NEXT: 0x6168 R_RISCV_IRELATIVE - 0x2030 # RELOC64-NEXT: } # RELOC64-LABEL: Hex dump of section '.got.plt': -# NO-APPLY-RELOC64: 0x00003340 00000000 00000000 -# APPLY-RELOC64: 0x00003340 60120000 00000000 -# RELOC64-EMPTY: +# NO-APPLY-RELOC64: 0x00006158 00000000 00000000 00000000 00000000 +# APPLY-RELOC64: 0x00006158 28200000 00000000 2c200000 00000000 -# SYM64: 000000000001270 0 FUNC GLOBAL DEFAULT {{.*}} func +# SYM64: {{0*}}3000 0 FUNC GLOBAL DEFAULT {{.*}} ifunc0 +# SYM64-NEXT: {{0*}}202c 0 IFUNC GLOBAL DEFAULT {{.*}} ifunc1 +# SYM64-NEXT: {{0*}}3020 0 FUNC GLOBAL DEFAULT {{.*}} ifunc2 # DIS64: <_start>: -# DIS64-NEXT: 1264: auipc a0, 0x0 -# DIS64-NEXT: addi a0, a0, 0xc +# DIS64-NEXT: 2000: jal 0x2024 <func> +# DIS64: <.L0>: +# DIS64-NEXT: 2004: auipc a0, 0x1 +# DIS64-NEXT: addi a0, a0, -0x4 +# DIS64: <.L1>: +# DIS64-NEXT: 200c: auipc a1, 0x4 +# DIS64-NEXT: addi a1, a1, 0x154 +# DIS64: <.L2>: +# DIS64-NEXT: 2014: auipc a2, 0x1 +# DIS64-NEXT: addi a2, a2, 0xc +# DIS64: <.L3>: +# DIS64-NEXT: 201c: auipc a3, 0x3 +# DIS64-NEXT: addi a3, a3, 0x134 # DIS64: Disassembly of section .iplt: -# DIS64: <func>: -## 64-bit: &.got.plt[func]-. = 0x3340-0x1270 = 4096*2+0xd0 -# DIS64-NEXT: 1270: auipc t3, 0x2 -# DIS64-NEXT: ld t3, 0xd0(t3) -# DIS64-NEXT: jalr t1, t3 -# DIS64-NEXT: nop +# DIS64: <ifunc0>: +## 64-bit: &.got.plt[ifunc0]-. = 0x6158-0x3000 = 4096*3+0x158 +# DIS64-NEXT: 3000: auipc t3, 0x3 +# DIS64-NEXT: ld t3, 0x158(t3) .text +.globl _start +_start: + call func +.L0: + auipc a0, %pcrel_hi(ifunc0) + addi a0, a0, %pcrel_lo(.L0) +.L1: + auipc a1, %got_pcrel_hi(ifunc1) + addi a1, a1, %pcrel_lo(.L1) +.L2: + auipc a2, %pcrel_hi(ifunc2) + addi a2, a2, %pcrel_lo(.L2) +.L3: + auipc a3, %got_pcrel_hi(ifunc2) + addi a3, a3, %pcrel_lo(.L3) + .globl func -.type func, @gnu_indirect_function func: ret -.globl _start -_start: -.L: - auipc a0, %pcrel_hi(func) - addi a0, a0, %pcrel_lo(.L) +## Resolvers are after relaxed code, so their addresses shift due to relaxation. +## The IRELATIVE addends must be adjusted accordingly. +.globl ifunc0, ifunc1, ifunc2 +.type ifunc0, @gnu_indirect_function +.type ifunc1, @gnu_indirect_function +.type ifunc2, @gnu_indirect_function +ifunc0: + ret +ifunc1: + ret +ifunc2: + ret `````````` </details> https://github.com/llvm/llvm-project/pull/179867 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
