Author: hans
Date: Tue Jan 19 12:53:02 2016
New Revision: 258162

URL: http://llvm.org/viewvc/llvm-project?rev=258162&view=rev
Log:
Merging r257925, r257929, r257930, and r257997:
------------------------------------------------------------------------
r257925 | mren | 2016-01-15 11:35:42 -0800 (Fri, 15 Jan 2016) | 10 lines

CXX_FAST_TLS calling convention: fix issue on X86-64.

When we have a single basic block, the explicit copy-back instructions should
be inserted right before the terminator. Before this fix, they were wrongly
placed at the beginning of the basic block.

I will commit fixes to other platforms as well.

PR26136
------------------------------------------------------------------------

------------------------------------------------------------------------
r257929 | mren | 2016-01-15 12:13:28 -0800 (Fri, 15 Jan 2016) | 10 lines

CXX_FAST_TLS calling convention: fix issue on AArch64.

When we have a single basic block, the explicit copy-back instructions should
be inserted right before the terminator. Before this fix, they were wrongly
placed at the beginning of the basic block.

I will commit fixes to other platforms as well.

PR26136
------------------------------------------------------------------------

------------------------------------------------------------------------
r257930 | mren | 2016-01-15 12:24:11 -0800 (Fri, 15 Jan 2016) | 8 lines

CXX_FAST_TLS calling convention: fix issue on ARM.

When we have a single basic block, the explicit copy-back instructions should
be inserted right before the terminator. Before this fix, they were wrongly
placed at the beginning of the basic block.

PR26136
------------------------------------------------------------------------

------------------------------------------------------------------------
r257997 | mren | 2016-01-16 08:39:46 -0800 (Sat, 16 Jan 2016) | 12 lines

CXX_FAST_TLS calling convention: fix issue on x86-64.

%RBP can't be handled explicitly. We generate the following code:
    pushq %rbp
    movq  %rsp, %rbp
    ...
    movq  %rbx, (%rbp)  ## 8-byte Spill
where %rbp will be overwritten by the spilled value.

The fix is to let PEI handle %RBP.
PR26136
------------------------------------------------------------------------

Modified:
    llvm/branches/release_38/   (props changed)
    llvm/branches/release_38/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/branches/release_38/lib/Target/ARM/ARMISelLowering.cpp
    llvm/branches/release_38/lib/Target/X86/X86CallingConv.td
    llvm/branches/release_38/lib/Target/X86/X86ISelLowering.cpp
    llvm/branches/release_38/test/CodeGen/AArch64/cxx-tlscc.ll
    llvm/branches/release_38/test/CodeGen/ARM/cxx-tlscc.ll
    llvm/branches/release_38/test/CodeGen/X86/cxx_tlscc64.ll

Propchange: llvm/branches/release_38/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Jan 19 12:53:02 2016
@@ -1,3 +1,3 @@
 /llvm/branches/Apple/Pertwee:110850,110961
 /llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,257645,257648,257730,257775,257791,257902,257905
+/llvm/trunk:155241,257645,257648,257730,257775,257791,257902,257905,257925,257929-257930,257997

Modified: llvm/branches/release_38/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: 
http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=258162&r1=258161&r2=258162&view=diff
==============================================================================
--- llvm/branches/release_38/lib/Target/AArch64/AArch64ISelLowering.cpp 
(original)
+++ llvm/branches/release_38/lib/Target/AArch64/AArch64ISelLowering.cpp Tue Jan 
19 12:53:02 2016
@@ -10133,6 +10133,7 @@ void AArch64TargetLowering::insertCopies
 
   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+  MachineBasicBlock::iterator MBBI = Entry->begin();
   for (const MCPhysReg *I = IStart; *I; ++I) {
     const TargetRegisterClass *RC = nullptr;
     if (AArch64::GPR64RegClass.contains(*I))
@@ -10152,13 +10153,13 @@ void AArch64TargetLowering::insertCopies
                Attribute::NoUnwind) &&
            "Function should be nounwind in insertCopiesSplitCSR!");
     Entry->addLiveIn(*I);
-    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
-            NewVR)
+    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
         .addReg(*I);
 
+    // Insert the copy-back instructions right before the terminator.
     for (auto *Exit : Exits)
-      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
-              *I)
+      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
+              TII->get(TargetOpcode::COPY), *I)
           .addReg(NewVR);
   }
 }

Modified: llvm/branches/release_38/lib/Target/ARM/ARMISelLowering.cpp
URL: 
http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/lib/Target/ARM/ARMISelLowering.cpp?rev=258162&r1=258161&r2=258162&view=diff
==============================================================================
--- llvm/branches/release_38/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/branches/release_38/lib/Target/ARM/ARMISelLowering.cpp Tue Jan 19 
12:53:02 2016
@@ -12423,6 +12423,7 @@ void ARMTargetLowering::insertCopiesSpli
 
   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+  MachineBasicBlock::iterator MBBI = Entry->begin();
   for (const MCPhysReg *I = IStart; *I; ++I) {
     const TargetRegisterClass *RC = nullptr;
     if (ARM::GPRRegClass.contains(*I))
@@ -12442,13 +12443,13 @@ void ARMTargetLowering::insertCopiesSpli
                Attribute::NoUnwind) &&
            "Function should be nounwind in insertCopiesSplitCSR!");
     Entry->addLiveIn(*I);
-    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
-            NewVR)
+    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
         .addReg(*I);
 
+    // Insert the copy-back instructions right before the terminator.
     for (auto *Exit : Exits)
-      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
-              *I)
+      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
+              TII->get(TargetOpcode::COPY), *I)
           .addReg(NewVR);
   }
 }

Modified: llvm/branches/release_38/lib/Target/X86/X86CallingConv.td
URL: 
http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/lib/Target/X86/X86CallingConv.td?rev=258162&r1=258161&r2=258162&view=diff
==============================================================================
--- llvm/branches/release_38/lib/Target/X86/X86CallingConv.td (original)
+++ llvm/branches/release_38/lib/Target/X86/X86CallingConv.td Tue Jan 19 
12:53:02 2016
@@ -832,10 +832,10 @@ def CSR_64_TLS_Darwin : CalleeSavedRegs<
                                              R8, R9, R10, R11)>;
 
 // CSRs that are handled by prologue, epilogue.
-def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>;
+def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add RBP)>;
 
 // CSRs that are handled explicitly via copies.
-def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>;
+def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, 
RBP)>;
 
 // All GPRs - except r11
 def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,

Modified: llvm/branches/release_38/lib/Target/X86/X86ISelLowering.cpp
URL: 
http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/lib/Target/X86/X86ISelLowering.cpp?rev=258162&r1=258161&r2=258162&view=diff
==============================================================================
--- llvm/branches/release_38/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/release_38/lib/Target/X86/X86ISelLowering.cpp Tue Jan 19 
12:53:02 2016
@@ -28908,6 +28908,7 @@ void X86TargetLowering::insertCopiesSpli
 
   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+  MachineBasicBlock::iterator MBBI = Entry->begin();
   for (const MCPhysReg *I = IStart; *I; ++I) {
     const TargetRegisterClass *RC = nullptr;
     if (X86::GR64RegClass.contains(*I))
@@ -28925,13 +28926,13 @@ void X86TargetLowering::insertCopiesSpli
                Attribute::NoUnwind) &&
            "Function should be nounwind in insertCopiesSplitCSR!");
     Entry->addLiveIn(*I);
-    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
-            NewVR)
+    BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
         .addReg(*I);
 
+    // Insert the copy-back instructions right before the terminator.
     for (auto *Exit : Exits)
-      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
-              *I)
+      BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
+              TII->get(TargetOpcode::COPY), *I)
           .addReg(NewVR);
   }
 }

Modified: llvm/branches/release_38/test/CodeGen/AArch64/cxx-tlscc.ll
URL: 
http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/test/CodeGen/AArch64/cxx-tlscc.ll?rev=258162&r1=258161&r2=258162&view=diff
==============================================================================
--- llvm/branches/release_38/test/CodeGen/AArch64/cxx-tlscc.ll (original)
+++ llvm/branches/release_38/test/CodeGen/AArch64/cxx-tlscc.ll Tue Jan 19 
12:53:02 2016
@@ -8,6 +8,7 @@
 @sg = internal thread_local global %struct.S zeroinitializer, align 1
 @__dso_handle = external global i8
 @__tls_guard = internal thread_local unnamed_addr global i1 false
+@sum1 = internal thread_local global i32 0, align 4
 
 declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
 declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
@@ -74,3 +75,29 @@ __tls_init.exit:
 ; CHECK-NOT: ldp d27, d26
 ; CHECK-NOT: ldp d29, d28
 ; CHECK-NOT: ldp d31, d30
+
+; CHECK-LABEL: _ZTW4sum1
+; CHECK-NOT: stp d31, d30
+; CHECK-NOT: stp d29, d28
+; CHECK-NOT: stp d27, d26
+; CHECK-NOT: stp d25, d24
+; CHECK-NOT: stp d23, d22
+; CHECK-NOT: stp d21, d20
+; CHECK-NOT: stp d19, d18
+; CHECK-NOT: stp d17, d16
+; CHECK-NOT: stp d7, d6
+; CHECK-NOT: stp d5, d4
+; CHECK-NOT: stp d3, d2
+; CHECK-NOT: stp d1, d0
+; CHECK-NOT: stp x20, x19
+; CHECK-NOT: stp x14, x13
+; CHECK-NOT: stp x12, x11
+; CHECK-NOT: stp x10, x9
+; CHECK-NOT: stp x8, x7
+; CHECK-NOT: stp x6, x5
+; CHECK-NOT: stp x4, x3
+; CHECK-NOT: stp x2, x1
+; CHECK: blr
+define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
+  ret i32* @sum1
+}

Modified: llvm/branches/release_38/test/CodeGen/ARM/cxx-tlscc.ll
URL: 
http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/test/CodeGen/ARM/cxx-tlscc.ll?rev=258162&r1=258161&r2=258162&view=diff
==============================================================================
--- llvm/branches/release_38/test/CodeGen/ARM/cxx-tlscc.ll (original)
+++ llvm/branches/release_38/test/CodeGen/ARM/cxx-tlscc.ll Tue Jan 19 12:53:02 
2016
@@ -8,6 +8,7 @@
 @sg = internal thread_local global %struct.S zeroinitializer, align 1
 @__dso_handle = external global i8
 @__tls_guard = internal thread_local unnamed_addr global i1 false
+@sum1 = internal thread_local global i32 0, align 4
 
 declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
 declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
@@ -44,3 +45,13 @@ __tls_init.exit:
 ; CHECK-NOT: pop {r9, r12}
 ; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc}
 ; CHECK: pop {lr}
+
+; CHECK-LABEL: _ZTW4sum1
+; CHECK-NOT: push {r1, r2, r3, r4, r7, lr}
+; CHECK-NOT: push {r9, r12}
+; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, 
d27, d28, d29, d30, d31}
+; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+; CHECK: blx
+define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
+  ret i32* @sum1
+}

Modified: llvm/branches/release_38/test/CodeGen/X86/cxx_tlscc64.ll
URL: 
http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/test/CodeGen/X86/cxx_tlscc64.ll?rev=258162&r1=258161&r2=258162&view=diff
==============================================================================
--- llvm/branches/release_38/test/CodeGen/X86/cxx_tlscc64.ll (original)
+++ llvm/branches/release_38/test/CodeGen/X86/cxx_tlscc64.ll Tue Jan 19 
12:53:02 2016
@@ -4,11 +4,13 @@
 ; tricks similar to AArch64 fast TLS calling convention (r255821).
 ; Applying tricks on x86-64 similar to r255821.
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | 
FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -O0 | FileCheck %s 
--check-prefix=CHECK-O0
 %struct.S = type { i8 }
 
 @sg = internal thread_local global %struct.S zeroinitializer, align 1
 @__dso_handle = external global i8
 @__tls_guard = internal thread_local unnamed_addr global i1 false
+@sum1 = internal thread_local global i32 0, align 4
 
 declare void @_ZN1SC1Ev(%struct.S*)
 declare void @_ZN1SD1Ev(%struct.S*)
@@ -50,3 +52,28 @@ init.i:
 __tls_init.exit:
   ret %struct.S* @sg
 }
+
+; CHECK-LABEL: _ZTW4sum1
+; CHECK-NOT: pushq %r11
+; CHECK-NOT: pushq %r10
+; CHECK-NOT: pushq %r9
+; CHECK-NOT: pushq %r8
+; CHECK-NOT: pushq %rsi
+; CHECK-NOT: pushq %rdx
+; CHECK-NOT: pushq %rcx
+; CHECK-NOT: pushq %rbx
+; CHECK: callq
+define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
+  ret i32* @sum1
+}
+
+; Make sure at O0 we don't overwrite RBP.
+; CHECK-O0-LABEL: _ZTW4sum2
+; CHECK-O0: pushq %rbp
+; CHECK-O0: movq %rsp, %rbp
+; CHECK-O0-NOT: movq %r{{.*}}, (%rbp) 
+define cxx_fast_tlscc i32* @_ZTW4sum2() #0 {
+  ret i32* @sum1
+}
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" }


_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to