https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/83136
>From cdc9ee6c322af0ceed162f3f714bcd0a22e020c3 Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Tue, 27 Feb 2024 22:16:38 +0800 Subject: [PATCH 1/8] [X86] Add Support for X86 TLSDESC Relocations --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 3 +- clang/test/Driver/tls-dialect.c | 2 +- .../lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 14 ++ llvm/lib/Target/X86/X86AsmPrinter.cpp | 2 + llvm/lib/Target/X86/X86ISelLowering.cpp | 98 +++++++---- llvm/lib/Target/X86/X86MCInstLower.cpp | 30 +++- llvm/test/CodeGen/X86/tls-desc.ll | 165 ++++++++++++++++++ 7 files changed, 273 insertions(+), 41 deletions(-) create mode 100644 llvm/test/CodeGen/X86/tls-desc.ll diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index faceee85a2f8dc..c66e3ee12e50c4 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -740,7 +740,8 @@ bool tools::isTLSDESCEnabled(const ToolChain &TC, SupportedArgument = V == "desc" || V == "trad"; EnableTLSDESC = V == "desc"; } else if (Triple.isX86()) { - SupportedArgument = V == "gnu"; + SupportedArgument = V == "gnu" || V == "gnu2"; + EnableTLSDESC = V == "gnu2"; } else { Unsupported = true; } diff --git a/clang/test/Driver/tls-dialect.c b/clang/test/Driver/tls-dialect.c index f73915b28ec2a3..a808dd81531ce7 100644 --- a/clang/test/Driver/tls-dialect.c +++ b/clang/test/Driver/tls-dialect.c @@ -2,6 +2,7 @@ // RUN: %clang -### --target=riscv64-linux -mtls-dialect=trad %s 2>&1 | FileCheck --check-prefix=NODESC %s // RUN: %clang -### --target=riscv64-linux %s 2>&1 | FileCheck --check-prefix=NODESC %s // RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu %s 2>&1 | FileCheck --check-prefix=NODESC %s +// RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=DESC %s /// Android supports TLSDESC by default on RISC-V /// TLSDESC is not on by default in Linux, even on RISC-V, and is covered above @@ -18,7 +19,6 @@ /// Unsupported argument // RUN: not %clang -### --target=riscv64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s -// RUN: not %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s // DESC: "-cc1" {{.*}}"-enable-tlsdesc" // NODESC-NOT: "-enable-tlsdesc" diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 4442b80861b61a..1877550f8c40bb 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -431,6 +431,20 @@ enum TOF { /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TLSLDM MO_TLSLDM, + /// MO_TLSCALL - On a symbol operand this indicates that the immediate is + /// the index of the TLS descriptor function for the symbol. Used in both + /// the IA32 and x86-64 local dynamic TLS access model. + /// See 'RFC-TLSDESC-x86' for more details. + /// SYMBOL_LABEL @TLSCALL + MO_TLSCALL, + /// MO_TLSDESC - On a symbol operand this indicates that the immediate is + /// the index of the TLS descriptor argument for the symbol. When this + /// argument is passed to a call getting from index@TLSCALL, the function will + /// return the offset for the symbol. Used in both the IA32 and x86-64 local + /// dynamic TLS access model. + /// See 'RFC-TLSDESC-x86' for more details. + /// SYMBOL_LABEL @TLSDESC + MO_TLSDESC, /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is /// the offset of the GOT entry with the thread-pointer offset for the /// symbol. Used in the x86-64 initial exec TLS access model. diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 3395a13545e454..d8e111db1cec42 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -271,6 +271,8 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, case X86II::MO_TLSGD: O << "@TLSGD"; break; case X86II::MO_TLSLD: O << "@TLSLD"; break; case X86II::MO_TLSLDM: O << "@TLSLDM"; break; + case X86II::MO_TLSDESC: O << "@TLSDESC"; break; + case X86II::MO_TLSCALL: O << "@TLSCALL"; break; case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break; case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break; case X86II::MO_TPOFF: O << "@TPOFF"; break; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a86f13135173b0..88314bcf510e9a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18515,17 +18515,17 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false); } -static SDValue -GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, - SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg, - unsigned char OperandFlags, bool LocalDynamic = false) { +static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, + GlobalAddressSDNode *GA, SDValue *InGlue, + const EVT PtrVT, unsigned ReturnReg, + unsigned char OperandFlags, bool UseTLSDESC = false, + bool LocalDynamic = false) { MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDLoc dl(GA); - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, - GA->getValueType(0), - GA->getOffset(), - OperandFlags); + SDValue TGA = DAG.getTargetGlobalAddress( + GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), + UseTLSDESC ? X86II::MO_TLSDESC : OperandFlags); X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR : X86ISD::TLSADDR; @@ -18543,13 +18543,27 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, MFI.setHasCalls(true); SDValue Glue = Chain.getValue(1); - return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue); + SDValue Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue); + + if (!UseTLSDESC) + return Ret; + + const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>(); + MVT VT = Subtarget.isTarget64BitLP64() ? MVT::i64 : MVT::i32; + unsigned Seg = Subtarget.is64Bit() ? X86AS::FS : X86AS::GS; + + Value *Ptr = Constant::getNullValue(PointerType::get(*DAG.getContext(), Seg)); + SDValue Offset = + DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl), + MachinePointerInfo(Ptr)); + return DAG.getNode(ISD::ADD, dl, VT, Ret, Offset); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit -static SDValue -LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const EVT PtrVT) { +static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, + SelectionDAG &DAG, + const EVT PtrVT, + bool UseTLSDESC) { SDValue InGlue; SDLoc dl(GA); // ? function entry point might be better SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, @@ -18557,23 +18571,26 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, SDLoc(), PtrVT), InGlue); InGlue = Chain.getValue(1); - return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD); + return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD, + UseTLSDESC); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64 -static SDValue -LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const EVT PtrVT) { - return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, - X86::RAX, X86II::MO_TLSGD); +static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, + SelectionDAG &DAG, + const EVT PtrVT, + bool UseTLSDESC) { + return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX, + X86II::MO_TLSGD, UseTLSDESC); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32 -static SDValue -LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const EVT PtrVT) { - return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, - X86::EAX, X86II::MO_TLSGD); +static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, + SelectionDAG &DAG, + const EVT PtrVT, + bool UseTLSDESC) { + return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX, + X86II::MO_TLSGD, UseTLSDESC); } static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, @@ -18590,14 +18607,16 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, if (Is64Bit) { unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX; Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg, - X86II::MO_TLSLD, /*LocalDynamic=*/true); + X86II::MO_TLSLD, /*UseTLSDESC=*/false, + /*LocalDynamic=*/true); } else { SDValue InGlue; SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue); InGlue = Chain.getValue(1); Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, - X86II::MO_TLSLDM, /*LocalDynamic=*/true); + X86II::MO_TLSLDM, /*UseTLSDESC=*/false, + /*LocalDynamic=*/true); } // Note: the CleanupLocalDynamicTLSPass will remove redundant computations @@ -18684,21 +18703,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isTargetELF()) { TLSModel::Model model = DAG.getTarget().getTLSModel(GV); + bool UseTLSDESC = DAG.getTarget().useTLSDESC(); switch (model) { - case TLSModel::GeneralDynamic: - if (Subtarget.is64Bit()) { - if (Subtarget.isTarget64BitLP64()) - return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); - return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT); - } - return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT); - case TLSModel::LocalDynamic: + case TLSModel::LocalDynamic: + if (!UseTLSDESC) return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(), Subtarget.isTarget64BitLP64()); - case TLSModel::InitialExec: - case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(), - PositionIndependent); + [[fallthrough]]; + case TLSModel::GeneralDynamic: + if (Subtarget.is64Bit()) { + if (Subtarget.isTarget64BitLP64()) { + // auto PtrVT = getPointerTy(DAG.getDataLayout(), X86AS::FS); + return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT, UseTLSDESC); + } + return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT, UseTLSDESC); + } + return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT, UseTLSDESC); + case TLSModel::InitialExec: + case TLSModel::LocalExec: + return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(), + PositionIndependent); } llvm_unreachable("Unknown TLS model."); } diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index d3b7d97a83caf0..e447e17c2d7d09 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -257,6 +257,12 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break; + case X86II::MO_TLSDESC: + RefKind = MCSymbolRefExpr::VK_TLSDESC; + break; + case X86II::MO_TLSCALL: + RefKind = MCSymbolRefExpr::VK_TLSCALL; + break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; @@ -524,13 +530,14 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 || MI.getOpcode() == X86::TLS_base_addr64; MCContext &Ctx = OutStreamer->getContext(); + bool isTLSDESC = MI.getOperand(3).getTargetFlags() == X86II::MO_TLSDESC; MCSymbolRefExpr::VariantKind SRVK; switch (MI.getOpcode()) { case X86::TLS_addr32: case X86::TLS_addr64: case X86::TLS_addrX32: - SRVK = MCSymbolRefExpr::VK_TLSGD; + SRVK = isTLSDESC ? MCSymbolRefExpr::VK_TLSDESC : MCSymbolRefExpr::VK_TLSGD; break; case X86::TLS_base_addr32: SRVK = MCSymbolRefExpr::VK_TLSLDM; @@ -554,7 +561,26 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, bool UseGot = MMI->getModule()->getRtLibUseGOT() && Ctx.getAsmInfo()->canRelaxRelocations(); - if (Is64Bits) { + if (isTLSDESC) { + const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create( + MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), + MCSymbolRefExpr::VK_TLSCALL, Ctx); + EmitAndCountInstruction( + MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r) + .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX) + .addReg(Is64Bits ? X86::RIP : X86::EBX) + .addImm(1) + .addReg(0) + .addExpr(Sym) + .addReg(0)); + EmitAndCountInstruction( + MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m) + .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX) + .addImm(1) + .addReg(0) + .addExpr(Expr) + .addReg(0)); + } else if (Is64Bits) { bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD; if (NeedsPadding && Is64BitsLP64) EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll new file mode 100644 index 00000000000000..ed363df6f78a43 --- /dev/null +++ b/llvm/test/CodeGen/X86/tls-desc.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=i686-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64 + +@x = thread_local global i32 0, align 4 +@y = internal thread_local global i32 0, align 4 + +define ptr @f1() nounwind { +; X86-LABEL: f1: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: calll .L0$pb +; X86-NEXT: .L0$pb: +; X86-NEXT: popl %ebx +; X86-NEXT: .Ltmp0: +; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx +; X86-NEXT: leal x@tlsdesc(%ebx), %eax +; X86-NEXT: calll *x@tlscall(%eax) +; X86-NEXT: addl %gs:0, %eax +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X32-LABEL: f1: +; X32: # %bb.0: +; X32-NEXT: pushq %rax +; X32-NEXT: leal x@tlsdesc(%rip), %eax +; X32-NEXT: callq *x@tlscall(%eax) +; X32-NEXT: # kill: def $eax killed $eax def $rax +; X32-NEXT: addl %fs:0, %eax +; X32-NEXT: popq %rcx +; X32-NEXT: retq +; +; X64-LABEL: f1: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: leaq x@tlsdesc(%rip), %rax +; X64-NEXT: callq *x@tlscall(%rax) +; X64-NEXT: addq %fs:0, %rax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x) + ret ptr %1 +} + +define i32 @f2() nounwind { +; X86-LABEL: f2: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi +; X86-NEXT: calll .L1$pb +; X86-NEXT: .L1$pb: +; X86-NEXT: popl %ebx +; X86-NEXT: .Ltmp1: +; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %ebx +; X86-NEXT: movl %gs:0, %esi +; X86-NEXT: leal x@tlsdesc(%ebx), %eax +; X86-NEXT: calll *x@tlscall(%eax) +; X86-NEXT: movl (%eax,%esi), %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X32-LABEL: f2: +; X32: # %bb.0: +; X32-NEXT: pushq %rbx +; X32-NEXT: movl %fs:0, %ebx +; X32-NEXT: leal x@tlsdesc(%rip), %eax +; X32-NEXT: callq *x@tlscall(%eax) +; X32-NEXT: movl (%eax,%ebx), %eax +; X32-NEXT: popq %rbx +; X32-NEXT: retq +; +; X64-LABEL: f2: +; X64: # %bb.0: +; X64-NEXT: pushq %rbx +; X64-NEXT: movq %fs:0, %rbx +; X64-NEXT: leaq x@tlsdesc(%rip), %rax +; X64-NEXT: callq *x@tlscall(%rax) +; X64-NEXT: movl (%rax,%rbx), %eax +; X64-NEXT: popq %rbx +; X64-NEXT: retq + %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x) + %2 = load i32, ptr %1 + ret i32 %2 +} + +define ptr @f3() nounwind { +; X86-LABEL: f3: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: calll .L2$pb +; X86-NEXT: .L2$pb: +; X86-NEXT: popl %ebx +; X86-NEXT: .Ltmp2: +; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L2$pb), %ebx +; X86-NEXT: leal x@tlsdesc(%ebx), %eax +; X86-NEXT: calll *x@tlscall(%eax) +; X86-NEXT: addl %gs:0, %eax +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X32-LABEL: f3: +; X32: # %bb.0: +; X32-NEXT: pushq %rax +; X32-NEXT: leal x@tlsdesc(%rip), %eax +; X32-NEXT: callq *x@tlscall(%eax) +; X32-NEXT: # kill: def $eax killed $eax def $rax +; X32-NEXT: addl %fs:0, %eax +; X32-NEXT: popq %rcx +; X32-NEXT: retq +; +; X64-LABEL: f3: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: leaq x@tlsdesc(%rip), %rax +; X64-NEXT: callq *x@tlscall(%rax) +; X64-NEXT: addq %fs:0, %rax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x) + ret ptr %1 +} + +define i32 @f4() nounwind { +; X86-LABEL: f4: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi +; X86-NEXT: calll .L3$pb +; X86-NEXT: .L3$pb: +; X86-NEXT: popl %ebx +; X86-NEXT: .Ltmp3: +; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx +; X86-NEXT: movl %gs:0, %esi +; X86-NEXT: leal x@tlsdesc(%ebx), %eax +; X86-NEXT: calll *x@tlscall(%eax) +; X86-NEXT: movl (%eax,%esi), %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X32-LABEL: f4: +; X32: # %bb.0: +; X32-NEXT: pushq %rbx +; X32-NEXT: movl %fs:0, %ebx +; X32-NEXT: leal x@tlsdesc(%rip), %eax +; X32-NEXT: callq *x@tlscall(%eax) +; X32-NEXT: movl (%eax,%ebx), %eax +; X32-NEXT: popq %rbx +; X32-NEXT: retq +; +; X64-LABEL: f4: +; X64: # %bb.0: +; X64-NEXT: pushq %rbx +; X64-NEXT: movq %fs:0, %rbx +; X64-NEXT: leaq x@tlsdesc(%rip), %rax +; X64-NEXT: callq *x@tlscall(%rax) +; X64-NEXT: movl (%rax,%rbx), %eax +; X64-NEXT: popq %rbx +; X64-NEXT: retq + %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x) + %2 = load i32, ptr %1 + ret i32 %2 +} >From 6eb75d6c99c926c055d809207dea918dd3ab4bf5 Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Wed, 28 Feb 2024 21:22:00 +0800 Subject: [PATCH 2/8] Address review comments --- llvm/lib/Target/X86/X86ISelLowering.cpp | 16 +++--- llvm/test/CodeGen/X86/tls-desc.ll | 71 ++++++++++++++++++++++--- 2 files changed, 72 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 88314bcf510e9a..1feaad80c8e35a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18515,7 +18515,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false); } -static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, +static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags, bool UseTLSDESC = false, @@ -18571,7 +18571,7 @@ static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SDLoc(), PtrVT), InGlue); InGlue = Chain.getValue(1); - return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD, + return getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD, UseTLSDESC); } @@ -18580,7 +18580,7 @@ static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, bool UseTLSDESC) { - return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX, + return getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX, X86II::MO_TLSGD, UseTLSDESC); } @@ -18589,7 +18589,7 @@ static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, bool UseTLSDESC) { - return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX, + return getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX, X86II::MO_TLSGD, UseTLSDESC); } @@ -18606,7 +18606,7 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SDValue Base; if (Is64Bit) { unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX; - Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg, + Base = getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg, X86II::MO_TLSLD, /*UseTLSDESC=*/false, /*LocalDynamic=*/true); } else { @@ -18614,7 +18614,7 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue); InGlue = Chain.getValue(1); - Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, + Base = getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSLDM, /*UseTLSDESC=*/false, /*LocalDynamic=*/true); } @@ -18712,10 +18712,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { [[fallthrough]]; case TLSModel::GeneralDynamic: if (Subtarget.is64Bit()) { - if (Subtarget.isTarget64BitLP64()) { - // auto PtrVT = getPointerTy(DAG.getDataLayout(), X86AS::FS); + if (Subtarget.isTarget64BitLP64()) return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT, UseTLSDESC); - } return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT, UseTLSDESC); } return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT, UseTLSDESC); diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll index ed363df6f78a43..7df1a5a09ce1ee 100644 --- a/llvm/test/CodeGen/X86/tls-desc.ll +++ b/llvm/test/CodeGen/X86/tls-desc.ll @@ -1,46 +1,105 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc < %s -mtriple=i686-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=i686 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64 @x = thread_local global i32 0, align 4 @y = internal thread_local global i32 0, align 4 +;; FIXME: GCC's x86-64 port assumes that FLAGS_REG and RAX are changed while all other registers are preserved. define ptr @f1() nounwind { ; X86-LABEL: f1: ; X86: # %bb.0: +; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $12, %esp ; X86-NEXT: calll .L0$pb ; X86-NEXT: .L0$pb: ; X86-NEXT: popl %ebx ; X86-NEXT: .Ltmp0: ; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx +; X86-NEXT: #APP +; X86-NEXT: #NO_APP +; X86-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: leal x@tlsdesc(%ebx), %eax ; X86-NEXT: calll *x@tlscall(%eax) ; X86-NEXT: addl %gs:0, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl (%esp), %ebx # 4-byte Reload +; X86-NEXT: #APP +; X86-NEXT: #NO_APP +; X86-NEXT: addl $12, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X32-LABEL: f1: ; X32: # %bb.0: +; X32-NEXT: pushq %rbp +; X32-NEXT: pushq %r15 +; X32-NEXT: pushq %r14 +; X32-NEXT: pushq %r13 +; X32-NEXT: pushq %r12 +; X32-NEXT: pushq %rbx ; X32-NEXT: pushq %rax +; X32-NEXT: #APP +; X32-NEXT: #NO_APP ; X32-NEXT: leal x@tlsdesc(%rip), %eax ; X32-NEXT: callq *x@tlscall(%eax) ; X32-NEXT: # kill: def $eax killed $eax def $rax ; X32-NEXT: addl %fs:0, %eax -; X32-NEXT: popq %rcx +; X32-NEXT: #APP +; X32-NEXT: #NO_APP +; X32-NEXT: addl $8, %esp +; X32-NEXT: popq %rbx +; X32-NEXT: popq %r12 +; X32-NEXT: popq %r13 +; X32-NEXT: popq %r14 +; X32-NEXT: popq %r15 +; X32-NEXT: popq %rbp ; X32-NEXT: retq ; ; X64-LABEL: f1: ; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: pushq %r15 +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %r13 +; X64-NEXT: pushq %r12 +; X64-NEXT: pushq %rbx ; X64-NEXT: pushq %rax +; X64-NEXT: #APP +; X64-NEXT: #NO_APP ; X64-NEXT: leaq x@tlsdesc(%rip), %rax ; X64-NEXT: callq *x@tlscall(%rax) ; X64-NEXT: addq %fs:0, %rax -; X64-NEXT: popq %rcx +; X64-NEXT: #APP +; X64-NEXT: #NO_APP +; X64-NEXT: addq $8, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r12 +; X64-NEXT: popq %r13 +; X64-NEXT: popq %r14 +; X64-NEXT: popq %r15 +; X64-NEXT: popq %rbp ; X64-NEXT: retq - %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x) - ret ptr %1 + %a = call { i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=r,=r,=r,=r,=r,=r,~{dirflag},~{fpsr},~{flags}"() + %b = call ptr @llvm.threadlocal.address.p0(ptr @x) + %a.0 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 0 + %a.1 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 1 + %a.2 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 2 + %a.3 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 3 + %a.4 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 4 + %a.5 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 5 + call void asm sideeffect "", "r,r,r,r,r,r,~{dirflag},~{fpsr},~{flags}"(i32 %a.0, i32 %a.1, i32 %a.2, i32 %a.3, i32 %a.4, i32 %a.5) + ret ptr %b } define i32 @f2() nounwind { >From 10ea12723c825affa71455d2d921ea19ed4d605e Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Thu, 29 Feb 2024 15:51:54 +0800 Subject: [PATCH 3/8] Define a new node for TLSDESC --- .../lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 14 ---- llvm/lib/Target/X86/X86AsmPrinter.cpp | 2 - llvm/lib/Target/X86/X86ISelLowering.cpp | 11 ++- llvm/lib/Target/X86/X86ISelLowering.h | 4 + llvm/lib/Target/X86/X86InstrCompiler.td | 10 +++ llvm/lib/Target/X86/X86InstrFragments.td | 3 + llvm/lib/Target/X86/X86MCInstLower.cpp | 23 +++-- llvm/test/CodeGen/X86/tls-desc.ll | 83 ++++++------------- 8 files changed, 60 insertions(+), 90 deletions(-) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 1877550f8c40bb..4442b80861b61a 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -431,20 +431,6 @@ enum TOF { /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TLSLDM MO_TLSLDM, - /// MO_TLSCALL - On a symbol operand this indicates that the immediate is - /// the index of the TLS descriptor function for the symbol. Used in both - /// the IA32 and x86-64 local dynamic TLS access model. - /// See 'RFC-TLSDESC-x86' for more details. - /// SYMBOL_LABEL @TLSCALL - MO_TLSCALL, - /// MO_TLSDESC - On a symbol operand this indicates that the immediate is - /// the index of the TLS descriptor argument for the symbol. When this - /// argument is passed to a call getting from index@TLSCALL, the function will - /// return the offset for the symbol. Used in both the IA32 and x86-64 local - /// dynamic TLS access model. - /// See 'RFC-TLSDESC-x86' for more details. - /// SYMBOL_LABEL @TLSDESC - MO_TLSDESC, /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is /// the offset of the GOT entry with the thread-pointer offset for the /// symbol. Used in the x86-64 initial exec TLS access model. diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index d8e111db1cec42..3395a13545e454 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -271,8 +271,6 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, case X86II::MO_TLSGD: O << "@TLSGD"; break; case X86II::MO_TLSLD: O << "@TLSLD"; break; case X86II::MO_TLSLDM: O << "@TLSLDM"; break; - case X86II::MO_TLSDESC: O << "@TLSDESC"; break; - case X86II::MO_TLSCALL: O << "@TLSCALL"; break; case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break; case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break; case X86II::MO_TPOFF: O << "@TPOFF"; break; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1feaad80c8e35a..437c1b432dae4d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18524,11 +18524,11 @@ static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDLoc dl(GA); SDValue TGA = DAG.getTargetGlobalAddress( - GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), - UseTLSDESC ? X86II::MO_TLSDESC : OperandFlags); + GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); - X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR - : X86ISD::TLSADDR; + X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC + : LocalDynamic ? X86ISD::TLSBASEADDR + : X86ISD::TLSADDR; if (InGlue) { SDValue Ops[] = { Chain, TGA, *InGlue }; @@ -33338,6 +33338,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(TLSADDR) NODE_NAME_CASE(TLSBASEADDR) NODE_NAME_CASE(TLSCALL) + NODE_NAME_CASE(TLSDESC) NODE_NAME_CASE(EH_SJLJ_SETJMP) NODE_NAME_CASE(EH_SJLJ_LONGJMP) NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH) @@ -36118,6 +36119,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case X86::TLS_base_addr32: case X86::TLS_base_addr64: case X86::TLS_base_addrX32: + case X86::TLS_desc32: + case X86::TLS_desc64: return EmitLoweredTLSAddr(MI, BB); case X86::INDIRECT_THUNK_CALL32: case X86::INDIRECT_THUNK_CALL64: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index f93c54781846bf..e0e39a6e7c905d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -295,6 +295,10 @@ namespace llvm { // thunk at the address from an earlier relocation. TLSCALL, + // Thread Local Storage. A descriptor containing pointer to + // code and to argument to get the TLS offset for the symbol. + TLSDESC, + // Exception Handling helpers. EH_RETURN, diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 9f1712274bc304..04cffe43c9eb63 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -507,6 +507,16 @@ def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym), Requires<[In64BitMode, NotLP64]>; } +// TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent +// stack-pointer assignments that appear immediately before calls from +// potentially appearing dead. +let Defs = [EAX, EFLAGS], usesCustomInserter = 1, Uses = [RSP, SSP] in { + def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + "# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>; + def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym), + "# TLS_desc64", [(X86tlsdesc tls64addr:$sym)]>; +} + // Darwin TLS Support // For i386, the address of the thunk is passed on the stack, on return the // address of the variable is in %eax. %ecx is trashed during the function diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index adf527d72f5b43..f14c7200af968a 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -223,6 +223,9 @@ def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR, def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def X86tlsdesc : SDNode<"X86ISD::TLSDESC", SDT_X86TLSADDR, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index e447e17c2d7d09..e0585e697d4d42 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -257,12 +257,6 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break; - case X86II::MO_TLSDESC: - RefKind = MCSymbolRefExpr::VK_TLSDESC; - break; - case X86II::MO_TLSCALL: - RefKind = MCSymbolRefExpr::VK_TLSCALL; - break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; @@ -525,19 +519,18 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI) { NoAutoPaddingScope NoPadScope(*OutStreamer); - bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 && - MI.getOpcode() != X86::TLS_base_addr32; + bool Is64Bits = getSubtarget().is64Bit(); bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 || - MI.getOpcode() == X86::TLS_base_addr64; + MI.getOpcode() == X86::TLS_base_addr64 || + MI.getOpcode() == X86::TLS_desc64; MCContext &Ctx = OutStreamer->getContext(); - bool isTLSDESC = MI.getOperand(3).getTargetFlags() == X86II::MO_TLSDESC; MCSymbolRefExpr::VariantKind SRVK; switch (MI.getOpcode()) { case X86::TLS_addr32: case X86::TLS_addr64: case X86::TLS_addrX32: - SRVK = isTLSDESC ? MCSymbolRefExpr::VK_TLSDESC : MCSymbolRefExpr::VK_TLSGD; + SRVK = MCSymbolRefExpr::VK_TLSGD; break; case X86::TLS_base_addr32: SRVK = MCSymbolRefExpr::VK_TLSLDM; @@ -546,6 +539,10 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, case X86::TLS_base_addrX32: SRVK = MCSymbolRefExpr::VK_TLSLD; break; + case X86::TLS_desc32: + case X86::TLS_desc64: + SRVK = MCSymbolRefExpr::VK_TLSDESC; + break; default: llvm_unreachable("unexpected opcode"); } @@ -561,7 +558,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, bool UseGot = MMI->getModule()->getRtLibUseGOT() && Ctx.getAsmInfo()->canRelaxRelocations(); - if (isTLSDESC) { + if (SRVK == MCSymbolRefExpr::VK_TLSDESC) { const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create( MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), MCSymbolRefExpr::VK_TLSCALL, Ctx); @@ -2190,6 +2187,8 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { case X86::TLS_base_addr32: case X86::TLS_base_addr64: case X86::TLS_base_addrX32: + case X86::TLS_desc32: + case X86::TLS_desc64: return LowerTlsAddr(MCInstLowering, *MI); case X86::MOVPC32r: { diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll index 7df1a5a09ce1ee..303acbf5aa96f4 100644 --- a/llvm/test/CodeGen/X86/tls-desc.ll +++ b/llvm/test/CodeGen/X86/tls-desc.ll @@ -6,7 +6,6 @@ @x = thread_local global i32 0, align 4 @y = internal thread_local global i32 0, align 4 -;; FIXME: GCC's x86-64 port assumes that FLAGS_REG and RAX are changed while all other registers are preserved. define ptr @f1() nounwind { ; X86-LABEL: f1: ; X86: # %bb.0: @@ -14,7 +13,7 @@ define ptr @f1() nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl %eax ; X86-NEXT: calll .L0$pb ; X86-NEXT: .L0$pb: ; X86-NEXT: popl %ebx @@ -22,18 +21,14 @@ define ptr @f1() nounwind { ; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx ; X86-NEXT: #APP ; X86-NEXT: #NO_APP -; X86-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: leal x@tlsdesc(%ebx), %eax ; X86-NEXT: calll *x@tlscall(%eax) ; X86-NEXT: addl %gs:0, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: movl (%esp), %ebx # 4-byte Reload ; X86-NEXT: #APP ; X86-NEXT: #NO_APP -; X86-NEXT: addl $12, %esp +; X86-NEXT: addl $4, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -42,12 +37,6 @@ define ptr @f1() nounwind { ; ; X32-LABEL: f1: ; X32: # %bb.0: -; X32-NEXT: pushq %rbp -; X32-NEXT: pushq %r15 -; X32-NEXT: pushq %r14 -; X32-NEXT: pushq %r13 -; X32-NEXT: pushq %r12 -; X32-NEXT: pushq %rbx ; X32-NEXT: pushq %rax ; X32-NEXT: #APP ; X32-NEXT: #NO_APP @@ -57,23 +46,11 @@ define ptr @f1() nounwind { ; X32-NEXT: addl %fs:0, %eax ; X32-NEXT: #APP ; X32-NEXT: #NO_APP -; X32-NEXT: addl $8, %esp -; X32-NEXT: popq %rbx -; X32-NEXT: popq %r12 -; X32-NEXT: popq %r13 -; X32-NEXT: popq %r14 -; X32-NEXT: popq %r15 -; X32-NEXT: popq %rbp +; X32-NEXT: popq %rcx ; X32-NEXT: retq ; ; X64-LABEL: f1: ; X64: # %bb.0: -; X64-NEXT: pushq %rbp -; X64-NEXT: pushq %r15 -; X64-NEXT: pushq %r14 -; X64-NEXT: pushq %r13 -; X64-NEXT: pushq %r12 -; X64-NEXT: pushq %rbx ; X64-NEXT: pushq %rax ; X64-NEXT: #APP ; X64-NEXT: #NO_APP @@ -82,13 +59,7 @@ define ptr @f1() nounwind { ; X64-NEXT: addq %fs:0, %rax ; X64-NEXT: #APP ; X64-NEXT: #NO_APP -; X64-NEXT: addq $8, %rsp -; X64-NEXT: popq %rbx -; X64-NEXT: popq %r12 -; X64-NEXT: popq %r13 -; X64-NEXT: popq %r14 -; X64-NEXT: popq %r15 -; X64-NEXT: popq %rbp +; X64-NEXT: popq %rcx ; X64-NEXT: retq %a = call { i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=r,=r,=r,=r,=r,=r,~{dirflag},~{fpsr},~{flags}"() %b = call ptr @llvm.threadlocal.address.p0(ptr @x) @@ -106,38 +77,36 @@ define i32 @f2() nounwind { ; X86-LABEL: f2: ; X86: # %bb.0: ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %esi ; X86-NEXT: calll .L1$pb ; X86-NEXT: .L1$pb: ; X86-NEXT: popl %ebx ; X86-NEXT: .Ltmp1: ; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %ebx -; X86-NEXT: movl %gs:0, %esi +; X86-NEXT: movl %gs:0, %ecx ; X86-NEXT: leal x@tlsdesc(%ebx), %eax ; X86-NEXT: calll *x@tlscall(%eax) -; X86-NEXT: movl (%eax,%esi), %eax -; X86-NEXT: popl %esi +; X86-NEXT: movl (%eax,%ecx), %eax ; X86-NEXT: popl %ebx ; X86-NEXT: retl ; ; X32-LABEL: f2: ; X32: # %bb.0: -; X32-NEXT: pushq %rbx -; X32-NEXT: movl %fs:0, %ebx +; X32-NEXT: pushq %rax +; X32-NEXT: movl %fs:0, %ecx ; X32-NEXT: leal x@tlsdesc(%rip), %eax ; X32-NEXT: callq *x@tlscall(%eax) -; X32-NEXT: movl (%eax,%ebx), %eax -; X32-NEXT: popq %rbx +; X32-NEXT: movl (%eax,%ecx), %eax +; X32-NEXT: popq %rcx ; X32-NEXT: retq ; ; X64-LABEL: f2: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx -; X64-NEXT: movq %fs:0, %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: movq %fs:0, %rcx ; X64-NEXT: leaq x@tlsdesc(%rip), %rax ; X64-NEXT: callq *x@tlscall(%rax) -; X64-NEXT: movl (%rax,%rbx), %eax -; X64-NEXT: popq %rbx +; X64-NEXT: movl (%rax,%rcx), %eax +; X64-NEXT: popq %rcx ; X64-NEXT: retq %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x) %2 = load i32, ptr %1 @@ -185,38 +154,36 @@ define i32 @f4() nounwind { ; X86-LABEL: f4: ; X86: # %bb.0: ; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %esi ; X86-NEXT: calll .L3$pb ; X86-NEXT: .L3$pb: ; X86-NEXT: popl %ebx ; X86-NEXT: .Ltmp3: ; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx -; X86-NEXT: movl %gs:0, %esi +; X86-NEXT: movl %gs:0, %ecx ; X86-NEXT: leal x@tlsdesc(%ebx), %eax ; X86-NEXT: calll *x@tlscall(%eax) -; X86-NEXT: movl (%eax,%esi), %eax -; X86-NEXT: popl %esi +; X86-NEXT: movl (%eax,%ecx), %eax ; X86-NEXT: popl %ebx ; X86-NEXT: retl ; ; X32-LABEL: f4: ; X32: # %bb.0: -; X32-NEXT: pushq %rbx -; X32-NEXT: movl %fs:0, %ebx +; X32-NEXT: pushq %rax +; X32-NEXT: movl %fs:0, %ecx ; X32-NEXT: leal x@tlsdesc(%rip), %eax ; X32-NEXT: callq *x@tlscall(%eax) -; X32-NEXT: movl (%eax,%ebx), %eax -; X32-NEXT: popq %rbx +; X32-NEXT: movl (%eax,%ecx), %eax +; X32-NEXT: popq %rcx ; X32-NEXT: retq ; ; X64-LABEL: f4: ; X64: # %bb.0: -; X64-NEXT: pushq %rbx -; X64-NEXT: movq %fs:0, %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: movq %fs:0, %rcx ; X64-NEXT: leaq x@tlsdesc(%rip), %rax ; X64-NEXT: callq *x@tlscall(%rax) -; X64-NEXT: movl (%rax,%rbx), %eax -; X64-NEXT: popq %rbx +; X64-NEXT: movl (%rax,%rcx), %eax +; X64-NEXT: popq %rcx ; X64-NEXT: retq %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x) %2 = load i32, ptr %1 >From 22f8bff277204fe8c37e7ea732e593b2ca5abdd7 Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Mon, 4 Mar 2024 21:58:28 +0800 Subject: [PATCH 4/8] Add one more internal thread_local --- llvm/test/CodeGen/X86/tls-desc.ll | 45 ++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll index 303acbf5aa96f4..6fc4ffdccf6901 100644 --- a/llvm/test/CodeGen/X86/tls-desc.ll +++ b/llvm/test/CodeGen/X86/tls-desc.ll @@ -5,6 +5,7 @@ @x = thread_local global i32 0, align 4 @y = internal thread_local global i32 0, align 4 +@z = internal thread_local global i32 1, align 4 define ptr @f1() nounwind { ; X86-LABEL: f1: @@ -159,33 +160,47 @@ define i32 @f4() nounwind { ; X86-NEXT: popl %ebx ; X86-NEXT: .Ltmp3: ; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx -; X86-NEXT: movl %gs:0, %ecx -; X86-NEXT: leal x@tlsdesc(%ebx), %eax -; X86-NEXT: calll *x@tlscall(%eax) -; X86-NEXT: movl (%eax,%ecx), %eax +; X86-NEXT: leal y@tlsdesc(%ebx), %eax +; X86-NEXT: calll *y@tlscall(%eax) +; X86-NEXT: movl %gs:0, %edx +; X86-NEXT: movl (%eax,%edx), %ecx +; X86-NEXT: leal z@tlsdesc(%ebx), %eax +; X86-NEXT: calll *z@tlscall(%eax) +; X86-NEXT: addl (%eax,%edx), %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: popl %ebx ; X86-NEXT: retl ; ; X32-LABEL: f4: ; X32: # %bb.0: ; X32-NEXT: pushq %rax -; X32-NEXT: movl %fs:0, %ecx -; X32-NEXT: leal x@tlsdesc(%rip), %eax -; X32-NEXT: callq *x@tlscall(%eax) -; X32-NEXT: movl (%eax,%ecx), %eax +; X32-NEXT: leal y@tlsdesc(%rip), %eax +; X32-NEXT: callq *y@tlscall(%eax) +; X32-NEXT: movl %fs:0, %edx +; X32-NEXT: movl (%eax,%edx), %ecx +; X32-NEXT: leal z@tlsdesc(%rip), %eax +; X32-NEXT: callq *z@tlscall(%eax) +; X32-NEXT: addl (%eax,%edx), %ecx +; X32-NEXT: movl %ecx, %eax ; X32-NEXT: popq %rcx ; X32-NEXT: retq ; ; X64-LABEL: f4: ; X64: # %bb.0: ; X64-NEXT: pushq %rax -; X64-NEXT: movq %fs:0, %rcx -; X64-NEXT: leaq x@tlsdesc(%rip), %rax -; X64-NEXT: callq *x@tlscall(%rax) -; X64-NEXT: movl (%rax,%rcx), %eax +; X64-NEXT: leaq y@tlsdesc(%rip), %rax +; X64-NEXT: callq *y@tlscall(%rax) +; X64-NEXT: # kill: def $rax killed $rax killed $eax +; X64-NEXT: movq %fs:0, %rdx +; X64-NEXT: movl (%rax,%rdx), %ecx +; X64-NEXT: leaq z@tlsdesc(%rip), %rax +; X64-NEXT: callq *z@tlscall(%rax) +; X64-NEXT: addl (%rax,%rdx), %ecx +; X64-NEXT: movl %ecx, %eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq - %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x) - %2 = load i32, ptr %1 - ret i32 %2 + %1 = load i32, ptr @y, align 4 + %2 = load i32, ptr @z, align 4 + %3 = add nsw i32 %1, %2 + ret i32 %3 } >From c2b992002f9f9f19d2b4d6f78b9c35aedbbffab3 Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Wed, 6 Mar 2024 18:32:23 +0800 Subject: [PATCH 5/8] User _TLS_MODULE_BASE_ for local dynamic model --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 16 +++++++++---- llvm/lib/Target/X86/X86ISelLowering.cpp | 28 +++++++++++++--------- llvm/test/CodeGen/X86/tls-desc.ll | 31 ++++++++++--------------- llvm/test/CodeGen/X86/tls-desc.s | 0 4 files changed, 40 insertions(+), 35 deletions(-) create mode 100644 llvm/test/CodeGen/X86/tls-desc.s diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index c8f80ced354538..1db37f83e5eccc 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3088,13 +3088,19 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N, bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { - assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); - auto *GA = cast<GlobalAddressSDNode>(N); + assert(N.getOpcode() == ISD::TargetGlobalTLSAddress || + N.getOpcode() == ISD::TargetExternalSymbol); X86ISelAddressMode AM; - AM.GV = GA->getGlobal(); - AM.Disp += GA->getOffset(); - AM.SymbolFlags = GA->getTargetFlags(); + if (auto *GA = dyn_cast<GlobalAddressSDNode>(N)) { + AM.GV = GA->getGlobal(); + AM.Disp += GA->getOffset(); + AM.SymbolFlags = GA->getTargetFlags(); + } else { + auto *SA = cast<ExternalSymbolSDNode>(N); + AM.ES = SA->getSymbol(); + AM.SymbolFlags = SA->getTargetFlags(); + } if (Subtarget->is32Bit()) { AM.Scale = 1; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 437c1b432dae4d..9c5f24c9a31c37 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18523,8 +18523,16 @@ static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain, MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDLoc dl(GA); - SDValue TGA = DAG.getTargetGlobalAddress( - GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); + SDValue TGA; + if (LocalDynamic && UseTLSDESC) { + TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags); + auto UI = TGA->use_begin(); + if (UI != TGA->use_end()) + return SDValue(*UI->use_begin()->use_begin(), 0); + } else { + TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), + GA->getOffset(), OperandFlags); + } X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC : LocalDynamic ? X86ISD::TLSBASEADDR @@ -18595,7 +18603,8 @@ static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, - bool Is64Bit, bool Is64BitLP64) { + bool Is64Bit, bool Is64BitLP64, + bool UseTLSDESC) { SDLoc dl(GA); // Get the start address of the TLS block for this module. @@ -18607,16 +18616,14 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, if (Is64Bit) { unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX; Base = getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg, - X86II::MO_TLSLD, /*UseTLSDESC=*/false, - /*LocalDynamic=*/true); + X86II::MO_TLSLD, UseTLSDESC, /*LocalDynamic=*/true); } else { SDValue InGlue; SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue); InGlue = Chain.getValue(1); Base = getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, - X86II::MO_TLSLDM, /*UseTLSDESC=*/false, - /*LocalDynamic=*/true); + X86II::MO_TLSLDM, UseTLSDESC, /*LocalDynamic=*/true); } // Note: the CleanupLocalDynamicTLSPass will remove redundant computations @@ -18706,10 +18713,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { bool UseTLSDESC = DAG.getTarget().useTLSDESC(); switch (model) { case TLSModel::LocalDynamic: - if (!UseTLSDESC) - return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(), - Subtarget.isTarget64BitLP64()); - [[fallthrough]]; + return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(), + Subtarget.isTarget64BitLP64(), + UseTLSDESC); case TLSModel::GeneralDynamic: if (Subtarget.is64Bit()) { if (Subtarget.isTarget64BitLP64()) diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll index 6fc4ffdccf6901..f4b9367ca32494 100644 --- a/llvm/test/CodeGen/X86/tls-desc.ll +++ b/llvm/test/CodeGen/X86/tls-desc.ll @@ -160,13 +160,11 @@ define i32 @f4() nounwind { ; X86-NEXT: popl %ebx ; X86-NEXT: .Ltmp3: ; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx -; X86-NEXT: leal y@tlsdesc(%ebx), %eax -; X86-NEXT: calll *y@tlscall(%eax) ; X86-NEXT: movl %gs:0, %edx -; X86-NEXT: movl (%eax,%edx), %ecx -; X86-NEXT: leal z@tlsdesc(%ebx), %eax -; X86-NEXT: calll *z@tlscall(%eax) -; X86-NEXT: addl (%eax,%edx), %ecx +; X86-NEXT: leal _TLS_MODULE_BASE_@tlsdesc(%ebx), %eax +; X86-NEXT: calll *_TLS_MODULE_BASE_@tlscall(%eax) +; X86-NEXT: movl y@DTPOFF(%eax,%edx), %ecx +; X86-NEXT: addl z@DTPOFF(%eax,%edx), %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: popl %ebx ; X86-NEXT: retl @@ -174,13 +172,11 @@ define i32 @f4() nounwind { ; X32-LABEL: f4: ; X32: # %bb.0: ; X32-NEXT: pushq %rax -; X32-NEXT: leal y@tlsdesc(%rip), %eax -; X32-NEXT: callq *y@tlscall(%eax) ; X32-NEXT: movl %fs:0, %edx -; X32-NEXT: movl (%eax,%edx), %ecx -; X32-NEXT: leal z@tlsdesc(%rip), %eax -; X32-NEXT: callq *z@tlscall(%eax) -; X32-NEXT: addl (%eax,%edx), %ecx +; X32-NEXT: leal _TLS_MODULE_BASE_@tlsdesc(%rip), %eax +; X32-NEXT: callq *_TLS_MODULE_BASE_@tlscall(%eax) +; X32-NEXT: movl y@DTPOFF(%eax,%edx), %ecx +; X32-NEXT: addl z@DTPOFF(%eax,%edx), %ecx ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: popq %rcx ; X32-NEXT: retq @@ -188,14 +184,11 @@ define i32 @f4() nounwind { ; X64-LABEL: f4: ; X64: # %bb.0: ; X64-NEXT: pushq %rax -; X64-NEXT: leaq y@tlsdesc(%rip), %rax -; X64-NEXT: callq *y@tlscall(%rax) -; X64-NEXT: # kill: def $rax killed $rax killed $eax ; X64-NEXT: movq %fs:0, %rdx -; X64-NEXT: movl (%rax,%rdx), %ecx -; X64-NEXT: leaq z@tlsdesc(%rip), %rax -; X64-NEXT: callq *z@tlscall(%rax) -; X64-NEXT: addl (%rax,%rdx), %ecx +; X64-NEXT: leaq _TLS_MODULE_BASE_@tlsdesc(%rip), %rax +; X64-NEXT: callq *_TLS_MODULE_BASE_@tlscall(%rax) +; X64-NEXT: movl y@DTPOFF(%rax,%rdx), %ecx +; X64-NEXT: addl z@DTPOFF(%rax,%rdx), %ecx ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/tls-desc.s b/llvm/test/CodeGen/X86/tls-desc.s new file mode 100644 index 00000000000000..e69de29bb2d1d6 >From 31cd21457a7c8b77d724a873a58fcf06fa3ef43d Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Wed, 6 Mar 2024 18:51:37 +0800 Subject: [PATCH 6/8] Revert change to signatures of GetTLSADDR --- llvm/lib/Target/X86/X86ISelLowering.cpp | 83 +++++++++++-------------- llvm/test/CodeGen/X86/tls-desc.s | 0 2 files changed, 38 insertions(+), 45 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/tls-desc.s diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9c5f24c9a31c37..e4df010a5a6c6f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18515,15 +18515,15 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false); } -static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain, - GlobalAddressSDNode *GA, SDValue *InGlue, - const EVT PtrVT, unsigned ReturnReg, - unsigned char OperandFlags, bool UseTLSDESC = false, - bool LocalDynamic = false) { +static SDValue +GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, + SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg, + unsigned char OperandFlags, bool LocalDynamic = false) { MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDLoc dl(GA); SDValue TGA; + bool UseTLSDESC = DAG.getTarget().useTLSDESC(); if (LocalDynamic && UseTLSDESC) { TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags); auto UI = TGA->use_begin(); @@ -18568,10 +18568,9 @@ static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain, } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit -static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, - SelectionDAG &DAG, - const EVT PtrVT, - bool UseTLSDESC) { +static SDValue +LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const EVT PtrVT) { SDValue InGlue; SDLoc dl(GA); // ? function entry point might be better SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, @@ -18579,32 +18578,28 @@ static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SDLoc(), PtrVT), InGlue); InGlue = Chain.getValue(1); - return getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD, - UseTLSDESC); + return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64 -static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, - SelectionDAG &DAG, - const EVT PtrVT, - bool UseTLSDESC) { - return getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX, - X86II::MO_TLSGD, UseTLSDESC); +static SDValue +LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const EVT PtrVT) { + return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, + X86::RAX, X86II::MO_TLSGD); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32 -static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, - SelectionDAG &DAG, - const EVT PtrVT, - bool UseTLSDESC) { - return getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX, - X86II::MO_TLSGD, UseTLSDESC); +static SDValue +LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const EVT PtrVT) { + return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, + X86::EAX, X86II::MO_TLSGD); } static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, - bool Is64Bit, bool Is64BitLP64, - bool UseTLSDESC) { + bool Is64Bit, bool Is64BitLP64) { SDLoc dl(GA); // Get the start address of the TLS block for this module. @@ -18615,15 +18610,15 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SDValue Base; if (Is64Bit) { unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX; - Base = getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg, - X86II::MO_TLSLD, UseTLSDESC, /*LocalDynamic=*/true); + Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg, + X86II::MO_TLSLD, /*LocalDynamic=*/true); } else { SDValue InGlue; SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue); InGlue = Chain.getValue(1); - Base = getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, - X86II::MO_TLSLDM, UseTLSDESC, /*LocalDynamic=*/true); + Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, + X86II::MO_TLSLDM, /*LocalDynamic=*/true); } // Note: the CleanupLocalDynamicTLSPass will remove redundant computations @@ -18710,23 +18705,21 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isTargetELF()) { TLSModel::Model model = DAG.getTarget().getTLSModel(GV); - bool UseTLSDESC = DAG.getTarget().useTLSDESC(); switch (model) { - case TLSModel::LocalDynamic: - return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(), - Subtarget.isTarget64BitLP64(), - UseTLSDESC); - case TLSModel::GeneralDynamic: - if (Subtarget.is64Bit()) { - if (Subtarget.isTarget64BitLP64()) - return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT, UseTLSDESC); - return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT, UseTLSDESC); - } - return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT, UseTLSDESC); - case TLSModel::InitialExec: - case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(), - PositionIndependent); + case TLSModel::GeneralDynamic: + if (Subtarget.is64Bit()) { + if (Subtarget.isTarget64BitLP64()) + return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); + return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT); + } + return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT); + case TLSModel::LocalDynamic: + return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(), + Subtarget.isTarget64BitLP64()); + case TLSModel::InitialExec: + case TLSModel::LocalExec: + return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(), + PositionIndependent); } llvm_unreachable("Unknown TLS model."); } diff --git a/llvm/test/CodeGen/X86/tls-desc.s b/llvm/test/CodeGen/X86/tls-desc.s deleted file mode 100644 index e69de29bb2d1d6..00000000000000 >From cf648db7e3fa64251e11e65f86dc999f8ff15c28 Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Thu, 7 Mar 2024 11:50:26 +0800 Subject: [PATCH 7/8] Address review comments. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 ++-- llvm/lib/Target/X86/X86MCInstLower.cpp | 4 +-- llvm/test/CodeGen/X86/tls-desc.ll | 48 +++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e4df010a5a6c6f..e05d04c7bcb1fa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18527,6 +18527,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, if (LocalDynamic && UseTLSDESC) { TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags); auto UI = TGA->use_begin(); + // Reuse existing GetTLSADDR node if we can find it. if (UI != TGA->use_end()) return SDValue(*UI->use_begin()->use_begin(), 0); } else { @@ -18557,14 +18558,13 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, return Ret; const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>(); - MVT VT = Subtarget.isTarget64BitLP64() ? MVT::i64 : MVT::i32; unsigned Seg = Subtarget.is64Bit() ? X86AS::FS : X86AS::GS; Value *Ptr = Constant::getNullValue(PointerType::get(*DAG.getContext(), Seg)); SDValue Offset = - DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl), + DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl), MachinePointerInfo(Ptr)); - return DAG.getNode(ISD::ADD, dl, VT, Ret, Offset); + return DAG.getNode(ISD::ADD, dl, PtrVT, Ret, Offset); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index e0585e697d4d42..b5f5fc57ac5f03 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -520,9 +520,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI) { NoAutoPaddingScope NoPadScope(*OutStreamer); bool Is64Bits = getSubtarget().is64Bit(); - bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 || - MI.getOpcode() == X86::TLS_base_addr64 || - MI.getOpcode() == X86::TLS_desc64; + bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64(); MCContext &Ctx = OutStreamer->getContext(); MCSymbolRefExpr::VariantKind SRVK; diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll index f4b9367ca32494..ea9a29dede3481 100644 --- a/llvm/test/CodeGen/X86/tls-desc.ll +++ b/llvm/test/CodeGen/X86/tls-desc.ll @@ -6,6 +6,7 @@ @x = thread_local global i32 0, align 4 @y = internal thread_local global i32 0, align 4 @z = internal thread_local global i32 1, align 4 +@t = external hidden thread_local global i32, align 4 define ptr @f1() nounwind { ; X86-LABEL: f1: @@ -197,3 +198,50 @@ define i32 @f4() nounwind { %3 = add nsw i32 %1, %2 ret i32 %3 } + +define i32 @f5() nounwind { +; X86-LABEL: f5: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: calll .L4$pb +; X86-NEXT: .L4$pb: +; X86-NEXT: popl %ebx +; X86-NEXT: .Ltmp4: +; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.L4$pb), %ebx +; X86-NEXT: movl %gs:0, %edx +; X86-NEXT: leal _TLS_MODULE_BASE_@tlsdesc(%ebx), %eax +; X86-NEXT: calll *_TLS_MODULE_BASE_@tlscall(%eax) +; X86-NEXT: movl z@DTPOFF(%eax,%edx), %ecx +; X86-NEXT: addl t@DTPOFF(%eax,%edx), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X32-LABEL: f5: +; X32: # %bb.0: +; X32-NEXT: pushq %rax +; X32-NEXT: movl %fs:0, %edx +; X32-NEXT: leal _TLS_MODULE_BASE_@tlsdesc(%rip), %eax +; X32-NEXT: callq *_TLS_MODULE_BASE_@tlscall(%eax) +; X32-NEXT: movl z@DTPOFF(%eax,%edx), %ecx +; X32-NEXT: addl t@DTPOFF(%eax,%edx), %ecx +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: popq %rcx +; X32-NEXT: retq +; +; X64-LABEL: f5: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: movq %fs:0, %rdx +; X64-NEXT: leaq _TLS_MODULE_BASE_@tlsdesc(%rip), %rax +; X64-NEXT: callq *_TLS_MODULE_BASE_@tlscall(%rax) +; X64-NEXT: movl z@DTPOFF(%rax,%rdx), %ecx +; X64-NEXT: addl t@DTPOFF(%rax,%rdx), %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %1 = load i32, ptr @z, align 4 + %2 = load i32, ptr @t, align 4 + %3 = add nsw i32 %1, %2 + ret i32 %3 +} >From 05c749f23c4c5867f3bda41b45924cccbee0c825 Mon Sep 17 00:00:00 2001 From: Phoebe Wang <phoebe.w...@intel.com> Date: Fri, 15 Mar 2024 15:43:09 +0800 Subject: [PATCH 8/8] Replace f4 with f5 --- llvm/test/CodeGen/X86/tls-desc.ll | 52 ++----------------------------- 1 file changed, 2 insertions(+), 50 deletions(-) diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll index ea9a29dede3481..c73986e69e7918 100644 --- a/llvm/test/CodeGen/X86/tls-desc.ll +++ b/llvm/test/CodeGen/X86/tls-desc.ll @@ -4,9 +4,8 @@ ; RUN: llc < %s -mtriple=x86_64 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64 @x = thread_local global i32 0, align 4 -@y = internal thread_local global i32 0, align 4 -@z = internal thread_local global i32 1, align 4 -@t = external hidden thread_local global i32, align 4 +@y = internal thread_local global i32 1, align 4 +@z = external hidden thread_local global i32, align 4 define ptr @f1() nounwind { ; X86-LABEL: f1: @@ -198,50 +197,3 @@ define i32 @f4() nounwind { %3 = add nsw i32 %1, %2 ret i32 %3 } - -define i32 @f5() nounwind { -; X86-LABEL: f5: -; X86: # %bb.0: -; X86-NEXT: pushl %ebx -; X86-NEXT: calll .L4$pb -; X86-NEXT: .L4$pb: -; X86-NEXT: popl %ebx -; X86-NEXT: .Ltmp4: -; X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.L4$pb), %ebx -; X86-NEXT: movl %gs:0, %edx -; X86-NEXT: leal _TLS_MODULE_BASE_@tlsdesc(%ebx), %eax -; X86-NEXT: calll *_TLS_MODULE_BASE_@tlscall(%eax) -; X86-NEXT: movl z@DTPOFF(%eax,%edx), %ecx -; X86-NEXT: addl t@DTPOFF(%eax,%edx), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: popl %ebx -; X86-NEXT: retl -; -; X32-LABEL: f5: -; X32: # %bb.0: -; X32-NEXT: pushq %rax -; X32-NEXT: movl %fs:0, %edx -; X32-NEXT: leal _TLS_MODULE_BASE_@tlsdesc(%rip), %eax -; X32-NEXT: callq *_TLS_MODULE_BASE_@tlscall(%eax) -; X32-NEXT: movl z@DTPOFF(%eax,%edx), %ecx -; X32-NEXT: addl t@DTPOFF(%eax,%edx), %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: popq %rcx -; X32-NEXT: retq -; -; X64-LABEL: f5: -; X64: # %bb.0: -; X64-NEXT: pushq %rax -; X64-NEXT: movq %fs:0, %rdx -; X64-NEXT: leaq _TLS_MODULE_BASE_@tlsdesc(%rip), %rax -; X64-NEXT: callq *_TLS_MODULE_BASE_@tlscall(%rax) -; X64-NEXT: movl z@DTPOFF(%rax,%rdx), %ecx -; X64-NEXT: addl t@DTPOFF(%rax,%rdx), %ecx -; X64-NEXT: movl %ecx, %eax -; X64-NEXT: popq %rcx -; X64-NEXT: retq - %1 = load i32, ptr @z, align 4 - %2 = load i32, ptr @t, align 4 - %3 = add nsw i32 %1, %2 - ret i32 %3 -} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits